This repository has been archived by the owner on Sep 13, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathshen-wei-tai-hu-zhi-guang-chu-ti-yan.html
627 lines (516 loc) · 84.4 KB
/
shen-wei-tai-hu-zhi-guang-chu-ti-yan.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
<!DOCTYPE html>
<html lang="zh">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>神威·太湖之光初体验</title>
<link href="/feeds/all.atom.xml" type="application/atom+xml" rel="alternate" title="华科美团点评技术俱乐部 Full Atom Feed" />
<link href="/feeds/chao-suan.atom.xml" type="application/atom+xml" rel="alternate" title="华科美团点评技术俱乐部 Categories Atom Feed" />
<!-- Bootstrap Core CSS -->
<link href="/theme/css/bootstrap.min.css" rel="stylesheet">
<!-- Custom CSS -->
<link href="/theme/css/clean-blog.min.css" rel="stylesheet">
<!-- Code highlight color scheme -->
<link href="/theme/css/code_blocks/darkly.css" rel="stylesheet">
<!-- Custom Fonts -->
<link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css" rel="stylesheet" type="text/css">
<link href='https://fonts.googleapis.com/css?family=Lora:400,700,400italic,700italic' rel='stylesheet' type='text/css'>
<link href='https://fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800' rel='stylesheet' type='text/css'>
<!-- HTML5 Shim and Respond.js IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
<script src="https://oss.maxcdn.com/libs/respond.js/1.4.2/respond.min.js"></script>
<![endif]-->
<meta name="description" content="神威·太湖之光初体验...">
<meta name="author" content="Yifeng Tang">
<meta name="tags" content="contest">
<meta name="tags" content="parallelization">
<meta property="og:locale" content="zh_CN.UTF-8">
<meta property="og:site_name" content="华科美团点评技术俱乐部">
<meta property="og:type" content="article">
<meta property="article:author" content="/author/yifeng-tang.html">
<meta property="og:url" content="/shen-wei-tai-hu-zhi-guang-chu-ti-yan.html">
<meta property="og:title" content="神威·太湖之光初体验">
<meta property="article:published_time" content="2017-04-06 16:05:24+08:00">
<meta property="og:description" content="神威·太湖之光初体验...">
<meta property="og:image" content="//images/bg.jpg">
</head>
<body>
<!-- Navigation -->
<nav class="navbar navbar-default navbar-custom navbar-fixed-top">
<div class="container-fluid">
<!-- Brand and toggle get grouped for better mobile display -->
<div class="navbar-header page-scroll">
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="/">华科美团点评技术俱乐部</a>
</div>
<!-- Collect the nav links, forms, and other content for toggling -->
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav navbar-right">
<li><a href="/categories.html">分类</a></li>
<li><a href="/archives.html">归档</a></li>
<li><a href="/authors.html">作者</a></li>
<li><a href="/tags.html">标签</a></li>
<li><a href="/pages/about/index.html">关于</a></li>
<li><a href="/pages/friendlinks/index.html">友链</a></li>
</ul>
</div>
<!-- /.navbar-collapse -->
</div>
<!-- /.container -->
</nav>
<!-- Page Header -->
<header class="intro-header" style="background-image: url('/images/bg.jpg')">
<div class="container">
<div class="row">
<div class="col-lg-8 col-lg-offset-2 col-md-10 col-md-offset-1">
<div class="post-heading">
<h1>神威·太湖之光初体验</h1>
<span class="meta">Posted by
<a href="/author/yifeng-tang.html">Yifeng Tang</a>
on 2017年 4月06日 周四
</span>
</div>
</div>
</div>
</div>
</header>
<!-- Main Content -->
<div class="container">
<div class="row">
<div class="col-lg-8 col-lg-offset-2 col-md-10 col-md-offset-1">
<!-- Post Content -->
<article>
<h1>神威·太湖之光初体验</h1>
<div class="highlight"><pre><span></span>在ASC17的比赛中,本菜鸡负责了MASNUM这样一个从上世纪90年代就开始发展的海浪模拟应用的在神威·太湖之光上的优化任务(虽然最后并没能在要求时间内跑完最后一个workload。。账号一开始被赛委会删了可能也是一个原因(?!!)。。),并且借此机会也尝试了一下神威的速度,同时踩了上面的许许多多的坑。
</pre></div>
<h2>神威的结构</h2>
<p><img alt="神威的结构图" src="/images/2017_4_30_1.png"></p>
<p>神威采用的是完全国产的SW26010处理器,其中每一个处理器有四个核组(CGs),这些核组之间是通过一个芯片上的网络互相连接的。每一个CG包含了一个内存管理器(MC)、消息处理核心(MPE)、计算处理核心集群(CPEs)。其中MPE支持完整的中断功能,内存管理,超标量和乱序问题/执行,擅长处理管理,任务进度和数据通信,而CPEs旨在最大限度地提高聚合计算吞吐量,同时最小化微架构的复杂性。相对于MPE,CPE不支持中断。</p>
<!--more-->
<p>其中MPE主频为1.45GHz,每一个核组内存8GB,L1缓存32KB,L2缓存256KB,核组之间的一般通过MPI进行通讯。从核主频为1.45GHz,每一个从核分别有64KB的局部存储空间,从核可以通过gld/gst离散访问或者DMA批量访问主寸,从核阵列之间可以通过寄存器通信进行低延迟的通信。有意思的是,CPE的LDM在栈空间很大时,可能会使得栈空间、局存、动态分配的空间相互冲突,如下图所示。如果想要发生冲突的时候报错,需要加入-preserve preserve_size编译命令。</p>
<p><img alt="LDM的使用情况" src="/images/2017_4_30_2.png"></p>
<h2>MASNUM的优化思路</h2>
<p>由于在去年的SC大会上,一篇关于在神威上对MASNUM的优化的论文成功进入了Gorden Bell Prize的提名,所以几乎所有的队伍都是在做着成功实现论文的工作。</p>
<p><a href="http://dblp2.uni-trier.de/db/conf/sc/sc2016.html">A highly effective global surface wave numerical simulation with ultra-high resolution. 46-56</a></p>
<p>论文上实现了三部分的优化,不过就算是做得最好的清华似乎也只是实现了其中的第二部分,简单点说就是把所有的运算全部扔到CPEs中进行并行计算,而我们也是在试图实现第二部分的优化。</p>
<p>经过分析可以发现其中有两个子程序implsch、propagat是整个程序的热点(最后交卷的时候只让填这两个子程序的运行时间也能看出来这点)。其中implsch是很容易优化的,因为implsch使用的所有数组数据都并没有超出循环变量的范围,通过分组映射发射之类的办法很容易就能把相应数据填入CPEs之中,从而榨干CPE的所有局部内存。然而propagat我们到最后也没能实现,因为其中有一个数据规模随着输入平方级增加的四位数组在程序上表现为是乱序使用数据的,在程序运行之前是无法预测可能需要哪一个数据的。。最后试着每次循环都离散读取这8个变量,反而运行时间提升了4倍。。从清华他们那里打听了消息,原来这8个变量就是一个九宫格中间那一个点附近周围的8个,但是我们只是一开始猜想了一下并没有仔细推论。。(同队的大佬一开始就说可能是在计算微分方程,但我和另一个哥们都没能引起重视继续向下思考,毕竟这程序写得就像那一坨那啥)</p>
<h2>神威上的加速库(Fortran下的)</h2>
<div class="highlight"><pre><span></span>神威上的加速库的目标差不多,就是将程序划分为host、slave两块,然后让MPE运行host的内存,CPE运行slave的内容。从编译器必须加 -host 、 -slave 或者 -hybrid 也能看出来这一点。
</pre></div>
<h3>Open-ACC*</h3>
<p>神威上的Open-ACC*是在Open-ACC 2.0的基础上进行魔改之后的东西。虽然我们和清华交流的时候统一了这东西不知道有什么意义、只会越跑越慢的意见,但是本菜鸡自己还是觉得可能是我们的打开方式都有点不对,不过现在也没办法试验,只能把其中遇见的问题再留在这里。</p>
<ol>
<li>
<p>似乎没能够将数组很好地分块</p>
<p>对于一个四维数组例如e(a, b, c, d),如果关键循环变量是a,b的话,Open-ACC*似乎没有办法能够将a,b下的其他两个维度的数据拿出来,就算能够拿出来也似乎没有办法将在a,b两个维度上进行分段批量读取。(至少文档上完全没提到)</p>
</li>
<li>
<p>似乎只能把数组按照循环变量进行分块</p>
<p>但有一些例如propagat这样需要获取附近的8个点的数据的操作,Open-ACC*可能很难实现。</p>
</li>
<li>
<p>文档上的很多看似很简单的功能实际上极难实现</p>
<p>反正菜鸡我照着这个文档做,基本都是在无限报错。</p>
</li>
<li>
<p>加速区只能是循环,且不允许两个加速区的并列</p>
<p>这一点就真的有点烦躁,有些时候你将会需要一些例如一部分CPE进行读取、一部分CPE进行转发、一部分CPE进行计算这样的复杂功能,然后就会发现Open-ACC*就不能执行这样的复杂操作。</p>
</li>
</ol>
<p>综上所述,我建议如果你的程序是对数组读取非常简单,或者是数组很小,并且不需要让每一个CPE分别有自己特殊的职责的话,可以选择使用Open-ACC*来进行优化。</p>
<p>菜鸡我一开始就是使用Open-ACC*来进行的优化,优化的最好效果是让时间翻倍。。。(不过还是有队伍使用Open-ACC成功进行了优化,我确实很好奇到底是怎样完成的。。或许从一开始就已经分出了胜负)</p>
<h3>Athread</h3>
<div class="highlight"><pre><span></span>神威上的Athread的文档全是用C来写的,然而MASNUM却是一个Fortran写的程序,我们一开始以为应该C有的API、Fortran也应该都有,然而真相是非常的残酷。所以以后一定要把全部代码用C重写一次,否则会感到非常绝望。(至少要混合编译将C的API给包装过来)
</pre></div>
<p>Athread是一个相对于Open-ACC*来说更加靠谱的库,它的功能更加复杂且强大,不过我们也只是用了最基础的几个操作就完成了对implsch的优化,并且使得它的运行速度提高了100倍(从15000s的运行时间到了150s)。在这里就大致介绍一下关键的函数或者说子程序。</p>
<h4>host部分</h4>
<ol>
<li>
<p>athread_init()</p>
<p>启动整个加速线程库,可以选择在程序最开始就初始化。说不定有什么性能的影响,菜鸡我也没办法再试验了。</p>
</li>
<li>
<p>athread_spawn(fun, address)</p>
<p>创建线程组,其中第一个参数是相应slave程序的函数,一般会写到另一个文件然后用external来调用,第二个参数是参数的起始地址。可以感觉到如果有多个参数将会非常的不方便,所以就用全局变量进行传递其实更好一些,具体效率在下也没测试。</p>
</li>
<li>
<p>athread_join()</p>
<p>阻塞等待线程组的全部结束。这个函数可以在MPE经过其他运算之后再调用,这样就实现了更进一步的并行。</p>
</li>
<li>
<p>athread_halt()</p>
<p>将整个加速线程库全部关闭,可以丢在程序最后面去。</p>
</li>
</ol>
<h4>slave部分</h4>
<p>slave文件中是通过/Common/区来进行对主存的直接访问,其中动态数组只能全部改成pointer然后来进行共享。然而CPEs内部的共享在下没能找到对应的API以及方法,提供的Openmp预编译指令也没能按照文档上所写的实现它的功能。。希望下一次能够找到方法。</p>
<ol>
<li>
<p>get_myid()</p>
<p>这个最关键的获取cpe自己的逻辑顺序的API在Fortran里面其实是没有的。。。赛委会给我们现场拷来了C语言的实现,然后混合编译进行调用。。真有意思。。(源代码我就不上传了,还是有点虚)</p>
</li>
<li>
<p>athread_get(0, src, des, len, reply, 0, 0, 0)</p>
<p>这几个0其实都有自己的意义,但是我们一般都不会用到。。想仔细用查查文档吧,可能会有进一步的提升。src是指经过common区共享的主内存的数据,des是指LDM中对应的数据位置,len是以字节计算的长度,reply是一个计数变量,会在get成功之后自增1。例程上面使用了简单的do while来进行等待,但是实际操作时,神威的工程师告诉我们,编译器可能会优化掉do while里面的reply从而使得整个程序卡住。所以尽量将do while写到另一个文件里面然后编译这个文件时加上-O0或者是-g(神威里面的-g默认-O0),从而避免这个优化。</p>
</li>
<li>
<p>athread_put(0, src, des, len, reply, 0, 0)</p>
<p>除了des与src是反着的,其他基本同上。</p>
</li>
<li>
<p>ldm_malloc</p>
<p>不好意思,Fortran里面没有,不过也可以完全避免动态数组的出现。</p>
</li>
</ol>
<h2>具体并行思路</h2>
<p>由于程序就是循环构成的,并行思路也比较简单,大致如下:</p>
<ol>
<li>
<p>先估算每一个点所需要的内存大小;</p>
</li>
<li>
<p>用64kb除以估算的内存大小,就可以得到最多能够一次存下的点的数量;</p>
</li>
<li>
<p>通过总点数/(点的数量*64),就可以得到想要把所有数据全部计算所需要的大的循环的次数;</p>
</li>
<li>
<p>每一次批量读取数据进行计算再批量取出即可;</p>
</li>
<li>
<p>某些变量以及固定长度的数组每一个CPE在一开始一起读入即可。</p>
</li>
</ol>
<p>具体关键映射操作如下Java代码所示(Fortran数组是从1开始计算的)</p>
<p>其中ixs、ixl、iys、iyl是点的范围,max_cpe_size是估算的最大点数,how_many是最大能够平均分配的循环次数,left_part是余下数量。</p>
<p>计算之后steps是当前需要计算的长度,简单的从1到steps循环即可。</p>
<div class="highlight"><pre><span></span><span class="kd">public</span> <span class="kd">class</span> <span class="nc">Test</span> <span class="o">{</span>
<span class="kd">public</span> <span class="kd">static</span> <span class="kt">void</span> <span class="nf">main</span><span class="o">(</span><span class="n">String</span><span class="o">[]</span> <span class="n">args</span><span class="o">)</span> <span class="o">{</span>
<span class="kt">int</span> <span class="n">ixl</span> <span class="o">=</span> <span class="mi">10000</span><span class="o">,</span> <span class="n">iyl</span> <span class="o">=</span> <span class="mi">10</span><span class="o">;</span>
<span class="kt">int</span> <span class="n">ixs</span> <span class="o">=</span> <span class="mi">1</span><span class="o">,</span> <span class="n">iys</span> <span class="o">=</span> <span class="mi">1</span><span class="o">;</span>
<span class="kt">int</span> <span class="n">max_cpe_size</span> <span class="o">=</span> <span class="mi">350</span><span class="o">;</span>
<span class="kt">int</span> <span class="n">how_many</span> <span class="o">=</span> <span class="o">(</span><span class="n">iyl</span> <span class="o">-</span> <span class="n">iys</span> <span class="o">+</span> <span class="mi">1</span><span class="o">)</span> <span class="o">*</span> <span class="o">(</span><span class="n">ixl</span> <span class="o">-</span> <span class="n">ixs</span> <span class="o">+</span> <span class="mi">1</span><span class="o">)</span> <span class="o">/</span> <span class="o">(</span><span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="o">);</span>
<span class="kt">int</span> <span class="n">left_part</span> <span class="o">=</span> <span class="o">(</span><span class="n">iyl</span> <span class="o">-</span> <span class="n">iys</span> <span class="o">+</span> <span class="mi">1</span><span class="o">)</span> <span class="o">*</span> <span class="o">(</span><span class="n">ixl</span> <span class="o">-</span> <span class="n">ixs</span> <span class="o">+</span> <span class="mi">1</span><span class="o">)</span> <span class="o">-</span> <span class="n">how_many</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="o">;</span>
<span class="kt">int</span> <span class="n">steps</span> <span class="o">=</span> <span class="mi">0</span><span class="o">,</span> <span class="n">ia</span> <span class="o">=</span> <span class="mi">0</span><span class="o">,</span> <span class="n">ic</span> <span class="o">=</span> <span class="mi">0</span><span class="o">,</span> <span class="n">my_id</span><span class="o">,</span> <span class="n">step_base</span><span class="o">,</span> <span class="n">step_left</span><span class="o">;</span>
<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">big_loop</span> <span class="o">=</span> <span class="mi">1</span><span class="o">;</span> <span class="n">big_loop</span> <span class="o"><=</span> <span class="n">how_many</span> <span class="o">+</span> <span class="mi">1</span><span class="o">;</span> <span class="n">big_loop</span><span class="o">++)</span> <span class="o">{</span>
<span class="k">for</span> <span class="o">(</span><span class="n">my_id</span> <span class="o">=</span> <span class="mi">1</span><span class="o">;</span> <span class="n">my_id</span> <span class="o"><=</span> <span class="mi">64</span><span class="o">;</span> <span class="n">my_id</span><span class="o">++)</span> <span class="o">{</span>
<span class="k">if</span> <span class="o">(</span><span class="n">big_loop</span> <span class="o">!=</span> <span class="n">how_many</span> <span class="o">+</span> <span class="mi">1</span><span class="o">)</span> <span class="o">{</span>
<span class="n">steps</span> <span class="o">=</span> <span class="n">max_cpe_size</span><span class="o">;</span>
<span class="n">ic</span> <span class="o">=</span> <span class="o">(((</span><span class="n">big_loop</span> <span class="o">-</span> <span class="mi">1</span><span class="o">)</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="o">)</span> <span class="o">+</span> <span class="o">(</span><span class="n">my_id</span> <span class="o">-</span> <span class="mi">1</span><span class="o">)</span> <span class="o">*</span> <span class="n">max_cpe_size</span><span class="o">)</span> <span class="o">/</span> <span class="o">(</span><span class="n">ixl</span> <span class="o">-</span> <span class="n">ixs</span> <span class="o">+</span> <span class="mi">1</span><span class="o">);</span>
<span class="n">ia</span> <span class="o">=</span> <span class="o">(((</span><span class="n">big_loop</span> <span class="o">-</span> <span class="mi">1</span><span class="o">)</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="o">)</span> <span class="o">+</span> <span class="o">(</span><span class="n">my_id</span> <span class="o">-</span> <span class="mi">1</span><span class="o">)</span> <span class="o">*</span> <span class="n">max_cpe_size</span><span class="o">)</span> <span class="o">-</span> <span class="n">ic</span> <span class="o">*</span> <span class="o">(</span><span class="n">ixl</span> <span class="o">-</span> <span class="n">ixs</span> <span class="o">+</span> <span class="mi">1</span><span class="o">);</span>
<span class="o">}</span> <span class="k">else</span> <span class="o">{</span>
<span class="n">step_base</span> <span class="o">=</span> <span class="n">left_part</span> <span class="o">/</span> <span class="mi">64</span><span class="o">;</span>
<span class="n">step_left</span> <span class="o">=</span> <span class="n">left_part</span> <span class="o">-</span> <span class="n">step_base</span> <span class="o">*</span> <span class="mi">64</span><span class="o">;</span>
<span class="k">if</span> <span class="o">(</span><span class="n">my_id</span> <span class="o"><=</span> <span class="n">step_left</span><span class="o">)</span> <span class="o">{</span>
<span class="n">steps</span> <span class="o">=</span> <span class="n">step_base</span> <span class="o">+</span> <span class="mi">1</span><span class="o">;</span>
<span class="n">ic</span> <span class="o">=</span> <span class="o">((</span><span class="n">how_many</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="o">)</span> <span class="o">+</span> <span class="o">(</span><span class="n">my_id</span> <span class="o">-</span> <span class="mi">1</span><span class="o">)</span> <span class="o">*</span> <span class="n">steps</span><span class="o">)</span> <span class="o">/</span> <span class="o">(</span><span class="n">ixl</span> <span class="o">-</span> <span class="n">ixs</span> <span class="o">+</span> <span class="mi">1</span><span class="o">);</span>
<span class="n">ia</span> <span class="o">=</span> <span class="o">((</span><span class="n">how_many</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="o">)</span> <span class="o">+</span> <span class="o">(</span><span class="n">my_id</span> <span class="o">-</span> <span class="mi">1</span><span class="o">)</span> <span class="o">*</span> <span class="n">steps</span><span class="o">)</span> <span class="o">-</span> <span class="n">ic</span> <span class="o">*</span> <span class="o">(</span><span class="n">ixl</span> <span class="o">-</span> <span class="n">ixs</span> <span class="o">+</span> <span class="mi">1</span><span class="o">);</span>
<span class="o">}</span> <span class="k">else</span> <span class="o">{</span>
<span class="n">steps</span> <span class="o">=</span> <span class="n">step_base</span><span class="o">;</span>
<span class="n">ic</span> <span class="o">=</span> <span class="o">((</span><span class="n">how_many</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="o">)</span> <span class="o">+</span> <span class="o">(</span><span class="n">step_left</span><span class="o">)</span> <span class="o">*</span> <span class="o">(</span><span class="n">step_base</span> <span class="o">+</span> <span class="mi">1</span><span class="o">)</span> <span class="o">+</span> <span class="o">(</span><span class="n">my_id</span> <span class="o">-</span> <span class="n">step_left</span> <span class="o">-</span> <span class="mi">1</span><span class="o">)</span> <span class="o">*</span> <span class="n">steps</span><span class="o">)</span> <span class="o">/</span> <span class="o">(</span><span class="n">ixl</span> <span class="o">-</span> <span class="n">ixs</span> <span class="o">+</span> <span class="mi">1</span><span class="o">);</span>
<span class="n">ia</span> <span class="o">=</span> <span class="o">((</span><span class="n">how_many</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="o">)</span> <span class="o">+</span> <span class="o">(</span><span class="n">step_left</span><span class="o">)</span> <span class="o">*</span> <span class="o">(</span><span class="n">step_base</span> <span class="o">+</span> <span class="mi">1</span><span class="o">)</span> <span class="o">+</span> <span class="o">(</span><span class="n">my_id</span> <span class="o">-</span> <span class="n">step_left</span> <span class="o">-</span> <span class="mi">1</span><span class="o">)</span> <span class="o">*</span> <span class="n">steps</span><span class="o">)</span> <span class="o">-</span> <span class="n">ic</span> <span class="o">*</span> <span class="o">(</span><span class="n">ixl</span> <span class="o">-</span> <span class="n">ixs</span> <span class="o">+</span> <span class="mi">1</span><span class="o">);</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="n">ic</span> <span class="o">=</span> <span class="n">ic</span> <span class="o">+</span> <span class="mi">1</span><span class="o">;</span>
<span class="n">ia</span> <span class="o">=</span> <span class="n">ia</span> <span class="o">+</span> <span class="mi">1</span><span class="o">;</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="o">}</span>
</pre></div>
<h2>出错整理</h2>
<p>神威的Exception非常的不详细,甚至他们的工程师自己都在吐槽,不过本菜鸡经过认真的测试,大致发现了以下的对应的规律。</p>
<ol>
<li>
<p>No SPE Exception</p>
<p>可能是栈空间不足。</p>
</li>
<li>
<p>Floating Pointer Exception</p>
<p>可能是程序计算映射出错,导致内存溢出到非法区域。</p>
</li>
<li>
<p>DMA Desxxxxxxxx? Exception</p>
<p>可能是循环的时候未处理steps = 0的情况。</p>
</li>
<li>
<p>Unknown Exception</p>
<p>呃。。别问我发生了什么</p>
</li>
<li>
<p>程序在athread_get卡住</p>
<p>检查-cgsp -b参数,或者-O0参数进行测试</p>
</li>
</ol>
<h2>资料出处</h2>
<p><a href="http://www.nsccwx.cn">无锡国家计算中心官网</a></p>
<h2>优化后实例代码</h2>
<p>源文件:</p>
<div class="highlight"><pre><span></span><span class="c">!-------------------------------------------------------------------------------</span>
<span class="c">!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@</span>
<span class="c">!-------------------------------------------------------------------------------</span>
<span class="c">!*DeckYinxq: mean2</span>
<span class="c">! �����أ����ڼ���Դ����</span>
<span class="k">subroutine </span><span class="n">mean2</span>
<span class="k">implicit none</span>
<span class="c">!-------------------------------------------------------------------------------</span>
<span class="kt">integer</span> <span class="kd">::</span> <span class="n">k</span><span class="p">,</span> <span class="n">k1</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">i1</span><span class="p">,</span> <span class="n">j</span>
<span class="kt">real</span> <span class="kd">::</span> <span class="n">dwkk</span><span class="p">,</span> <span class="n">wfk</span><span class="p">,</span> <span class="n">wfk1</span><span class="p">,</span> <span class="n">wsk</span><span class="p">,</span> <span class="n">wsk1</span><span class="p">,</span> <span class="n">wkk</span><span class="p">,</span> <span class="n">wkk1</span><span class="p">,</span> <span class="n">ekj</span><span class="p">,</span> <span class="n">ekj1</span>
<span class="c">!-------------------------------------------------------------------------------</span>
<span class="k">do </span><span class="mi">100</span> <span class="n">ic</span><span class="o">=</span><span class="n">iys</span><span class="p">,</span><span class="n">iyl</span>
<span class="k">do </span><span class="mi">100</span> <span class="n">ia</span><span class="o">=</span><span class="n">ixs</span><span class="p">,</span><span class="n">ixl</span>
<span class="n">ae</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="mf">0.</span>
<span class="n">asi</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="mf">0.</span>
<span class="n">awf</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="mf">0.</span>
<span class="n">awk</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="mf">0.</span>
<span class="n">ark</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="mf">0.</span>
<span class="k">if</span><span class="p">(</span><span class="n">nsp</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">).</span><span class="n">ne</span><span class="p">.</span><span class="mi">1</span><span class="p">)</span> <span class="k">cycle</span>
<span class="k"> do </span><span class="mi">200</span> <span class="n">k</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span><span class="n">kld</span>
<span class="n">k1</span><span class="o">=</span><span class="n">k</span><span class="o">+</span><span class="mi">1</span>
<span class="n">i</span><span class="o">=</span><span class="n">k</span><span class="o">-</span><span class="n">kl</span><span class="o">+</span><span class="mi">1</span>
<span class="n">i1</span><span class="o">=</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span>
<span class="n">dwkk</span><span class="o">=</span><span class="n">dwk</span><span class="p">(</span><span class="n">k</span><span class="p">)</span>
<span class="n">wfk</span><span class="o">=</span><span class="n">wf</span><span class="p">(</span><span class="n">k</span><span class="p">,</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span>
<span class="n">wfk1</span><span class="o">=</span><span class="n">wf</span><span class="p">(</span><span class="n">k1</span><span class="p">,</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span>
<span class="c">! wfk=fr(k)</span>
<span class="c">! wfk1=fr(k1)</span>
<span class="n">wsk</span><span class="o">=</span><span class="n">zpi</span><span class="o">*</span><span class="n">wfk</span>
<span class="n">wsk1</span><span class="o">=</span><span class="n">zpi</span><span class="o">*</span><span class="n">wfk1</span>
<span class="n">wkk</span><span class="o">=</span><span class="n">wk</span><span class="p">(</span><span class="n">k</span><span class="p">)</span>
<span class="n">wkk1</span><span class="o">=</span><span class="n">wk</span><span class="p">(</span><span class="n">k1</span><span class="p">)</span>
<span class="k">do </span><span class="mi">200</span> <span class="n">j</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span><span class="n">jl</span>
<span class="k">if</span> <span class="p">(</span><span class="n">k</span><span class="p">.</span><span class="n">lt</span><span class="p">.</span><span class="n">kl</span><span class="p">)</span> <span class="k">then</span>
<span class="k"> </span><span class="n">ekj</span><span class="o">=</span><span class="n">e</span><span class="p">(</span><span class="n">k</span><span class="p">,</span><span class="n">j</span><span class="p">,</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span>
<span class="n">ekj1</span><span class="o">=</span><span class="n">e</span><span class="p">(</span><span class="n">k1</span><span class="p">,</span><span class="n">j</span><span class="p">,</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span>
<span class="k">else</span>
<span class="k"> </span><span class="n">ekj</span><span class="o">=</span><span class="n">e</span><span class="p">(</span><span class="n">kl</span><span class="p">,</span><span class="n">j</span><span class="p">,</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">*</span><span class="n">wkh</span><span class="p">(</span><span class="n">i</span><span class="p">)</span>
<span class="n">ekj1</span><span class="o">=</span><span class="n">e</span><span class="p">(</span><span class="n">kl</span><span class="p">,</span><span class="n">j</span><span class="p">,</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">*</span><span class="n">wkh</span><span class="p">(</span><span class="n">i1</span><span class="p">)</span>
<span class="n">endif</span>
<span class="n">ae</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="n">ae</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">+</span><span class="p">(</span><span class="n">ekj</span><span class="o">+</span><span class="n">ekj1</span><span class="p">)</span><span class="o">*</span><span class="n">dwkk</span>
<span class="n">awf</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="n">awf</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">+</span><span class="p">(</span><span class="n">ekj</span><span class="o">*</span><span class="n">wfk</span><span class="o">+</span><span class="n">ekj1</span><span class="o">*</span><span class="n">wfk1</span><span class="p">)</span><span class="o">*</span><span class="n">dwkk</span>
<span class="n">asi</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="n">asi</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">+</span><span class="p">(</span><span class="n">ekj</span><span class="o">/</span><span class="n">wsk</span><span class="o">+</span><span class="n">ekj1</span><span class="o">/</span><span class="n">wsk1</span><span class="p">)</span><span class="o">*</span><span class="n">dwkk</span>
<span class="n">awk</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="n">awk</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">+</span><span class="p">(</span><span class="n">ekj</span><span class="o">*</span><span class="n">wkk</span><span class="o">+</span><span class="n">ekj1</span><span class="o">*</span><span class="n">wkk1</span><span class="p">)</span><span class="o">*</span><span class="n">dwkk</span>
<span class="n">ark</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="n">ark</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">+</span><span class="p">(</span><span class="n">ekj</span><span class="o">/</span><span class="nb">sqrt</span><span class="p">(</span><span class="n">wkk</span><span class="p">)</span><span class="o">+</span><span class="n">ekj1</span><span class="o">/</span><span class="nb">sqrt</span><span class="p">(</span><span class="n">wkk1</span><span class="p">))</span><span class="o">*</span><span class="n">dwkk</span>
<span class="mi">200</span> <span class="k">continue</span>
<span class="k"> </span><span class="n">asi</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="n">ae</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">/</span><span class="n">asi</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span>
<span class="n">awf</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="n">awf</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">/</span><span class="n">ae</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span>
<span class="n">awk</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="n">awk</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">/</span><span class="n">ae</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span>
<span class="n">ark</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">=</span><span class="p">(</span><span class="n">ae</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">)</span><span class="o">/</span><span class="n">ark</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span><span class="n">ic</span><span class="p">))</span><span class="o">**</span><span class="mi">2</span>
<span class="mi">100</span> <span class="k">continue</span>
<span class="c">!-------------------------------------------------------------------------------</span>
<span class="k">return</span>
<span class="c">!-------------------------------------------------------------------------------</span>
<span class="k">end subroutine </span><span class="n">mean2</span>
<span class="c">!-------------------------------------------------------------------------------</span>
<span class="c">!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@</span>
<span class="c">!-------------------------------------------------------------------------------</span>
</pre></div>
<p>host代码:</p>
<div class="highlight"><pre><span></span><span class="k">subroutine </span><span class="n">mean2</span>
<span class="k">implicit none</span>
<span class="k"> </span><span class="kt">integer</span> <span class="kd">::</span> <span class="n">how_many</span><span class="p">,</span> <span class="n">left_part</span>
<span class="kt">integer</span><span class="p">,</span><span class="k">external</span><span class="kd">::</span><span class="n">slave_mean2_slave</span>
<span class="kt">integer</span><span class="p">,</span> <span class="k">parameter</span> <span class="kd">::</span> <span class="n">max_cpe_size</span> <span class="o">=</span> <span class="mi">35</span>
<span class="k">common</span> <span class="o">/</span><span class="n">mean2_host_hm</span><span class="o">/</span> <span class="n">how_many</span>
<span class="k">common</span> <span class="o">/</span><span class="n">mean2_host_lp</span><span class="o">/</span> <span class="n">left_part</span>
<span class="n">how_many</span> <span class="o">=</span> <span class="p">(</span><span class="n">iyl</span> <span class="o">-</span> <span class="n">iys</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="p">(</span><span class="n">ixl</span> <span class="o">-</span> <span class="n">ixs</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="p">)</span>
<span class="n">left_part</span> <span class="o">=</span> <span class="p">(</span><span class="n">iyl</span> <span class="o">-</span> <span class="n">iys</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="p">(</span><span class="n">ixl</span> <span class="o">-</span> <span class="n">ixs</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">-</span> <span class="n">how_many</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span>
<span class="k">call </span><span class="n">athread_spawn</span><span class="p">(</span><span class="n">slave_mean2_slave</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_join</span><span class="p">()</span>
<span class="k">return</span>
<span class="k">end subroutine </span><span class="n">mean2</span>
</pre></div>
<p>slave代码:</p>
<div class="highlight"><pre><span></span><span class="k">subroutine </span><span class="n">mean2_slave</span>
<span class="k">implicit none</span>
<span class="k"> </span><span class="kt">integer</span> <span class="kd">::</span> <span class="n">my_id</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="n">fak</span>
<span class="kt">integer</span> <span class="kd">::</span> <span class="n">how_many</span><span class="p">,</span> <span class="n">left_part</span>
<span class="kt">integer</span> <span class="kd">::</span> <span class="n">big_loop</span><span class="p">,</span> <span class="n">steps</span><span class="p">,</span> <span class="n">step_left</span><span class="p">,</span> <span class="n">step_base</span>
<span class="kt">integer</span> <span class="kd">::</span> <span class="n">k</span><span class="p">,</span> <span class="n">k1</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">i1</span><span class="p">,</span> <span class="n">j</span><span class="p">,</span> <span class="n">ia</span><span class="p">,</span> <span class="n">ic</span>
<span class="kt">integer</span> <span class="kd">::</span> <span class="nb">count</span>
<span class="nb"> </span><span class="kt">real</span> <span class="kd">::</span> <span class="n">dwkk</span><span class="p">,</span> <span class="n">wfk</span><span class="p">,</span> <span class="n">wfk1</span><span class="p">,</span> <span class="n">wsk</span><span class="p">,</span> <span class="n">wsk1</span><span class="p">,</span> <span class="n">wkk</span><span class="p">,</span> <span class="n">wkk1</span><span class="p">,</span> <span class="n">ekj</span><span class="p">,</span> <span class="n">ekj1</span>
<span class="kt">integer</span><span class="p">,</span> <span class="k">parameter</span> <span class="kd">::</span> <span class="n">kld</span> <span class="o">=</span> <span class="mi">30</span>
<span class="kt">integer</span><span class="p">,</span> <span class="k">parameter</span> <span class="kd">::</span> <span class="n">kl</span> <span class="o">=</span> <span class="mi">25</span>
<span class="kt">integer</span><span class="p">,</span> <span class="k">parameter</span> <span class="kd">::</span> <span class="n">jl</span> <span class="o">=</span> <span class="mi">12</span>
<span class="kt">integer</span><span class="p">,</span> <span class="k">parameter</span> <span class="kd">::</span> <span class="n">zpi</span> <span class="o">=</span> <span class="mf">3.1415926</span> <span class="o">*</span> <span class="mi">2</span>
<span class="kt">integer</span><span class="p">,</span> <span class="k">parameter</span> <span class="kd">::</span> <span class="n">max_cpe_size</span> <span class="o">=</span> <span class="mi">35</span>
<span class="kt">integer</span> <span class="kd">::</span> <span class="n">iys</span><span class="p">,</span> <span class="n">iyl</span><span class="p">,</span> <span class="n">ixs</span><span class="p">,</span> <span class="n">ixl</span>
<span class="kt">real</span><span class="p">,</span> <span class="k">pointer</span> <span class="kd">::</span> <span class="n">ae</span><span class="p">(:,:),</span> <span class="n">asi</span><span class="p">(:,:),</span> <span class="n">awf</span><span class="p">(:,:),</span> <span class="n">awk</span><span class="p">(:,:),</span> <span class="n">ark</span><span class="p">(:,:),</span> <span class="n">nsp</span><span class="p">(:,:)</span>
<span class="kt">real</span><span class="p">,</span> <span class="k">pointer</span> <span class="kd">::</span> <span class="n">wf</span><span class="p">(:,</span> <span class="p">:,</span> <span class="p">:)</span>
<span class="kt">real</span><span class="p">,</span> <span class="k">pointer</span> <span class="kd">::</span> <span class="n">e</span><span class="p">(:,</span> <span class="p">:,</span> <span class="p">:,</span> <span class="p">:)</span>
<span class="kt">real</span> <span class="kd">::</span> <span class="n">dwk</span><span class="p">(</span><span class="mi">31</span><span class="p">),</span> <span class="n">wk</span><span class="p">(</span><span class="mi">31</span><span class="p">),</span> <span class="n">wkh</span><span class="p">(</span><span class="mi">31</span><span class="p">)</span>
<span class="kt">integer</span> <span class="kd">::</span> <span class="n">how_many_slave</span><span class="p">,</span> <span class="n">left_part_slave</span>
<span class="kt">integer</span> <span class="kd">::</span> <span class="n">iys_slave</span><span class="p">,</span> <span class="n">iyl_slave</span><span class="p">,</span> <span class="n">ixs_slave</span><span class="p">,</span> <span class="n">ixl_slave</span>
<span class="kt">real</span> <span class="kd">::</span> <span class="n">dwk_slave</span><span class="p">(</span><span class="mi">31</span><span class="p">),</span> <span class="n">wk_slave</span><span class="p">(</span><span class="mi">31</span><span class="p">),</span> <span class="n">wkh_slave</span><span class="p">(</span><span class="mi">31</span><span class="p">)</span>
<span class="kt">real</span> <span class="kd">::</span> <span class="n">ae_slave</span><span class="p">(</span><span class="mi">35</span><span class="p">),</span> <span class="n">asi_slave</span><span class="p">(</span><span class="mi">35</span><span class="p">),</span> <span class="n">awf_slave</span><span class="p">(</span><span class="mi">35</span><span class="p">),</span> <span class="n">awk_slave</span><span class="p">(</span><span class="mi">35</span><span class="p">),</span> <span class="n">ark_slave</span><span class="p">(</span><span class="mi">35</span><span class="p">),</span> <span class="n">nsp_slave</span><span class="p">(</span><span class="mi">35</span><span class="p">)</span>
<span class="kt">real</span> <span class="kd">::</span> <span class="n">wf_slave</span><span class="p">(</span><span class="mi">35</span> <span class="o">*</span> <span class="mi">31</span><span class="p">),</span> <span class="n">e_slave</span><span class="p">(</span><span class="mi">35</span> <span class="o">*</span> <span class="mi">25</span> <span class="o">*</span> <span class="mi">12</span><span class="p">)</span>
<span class="kt">integer</span><span class="p">,</span><span class="k">external</span><span class="kd">::</span><span class="n">get_myid</span>
<span class="k">common</span> <span class="o">/</span><span class="n">mean2_host_hm</span><span class="o">/</span> <span class="n">how_many</span>
<span class="k">common</span> <span class="o">/</span><span class="n">mean2_host_lp</span><span class="o">/</span> <span class="n">left_part</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_iys</span><span class="o">/</span> <span class="n">iys</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_iyl</span><span class="o">/</span> <span class="n">iyl</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_ixs</span><span class="o">/</span> <span class="n">ixs</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_ixl</span><span class="o">/</span> <span class="n">ixl</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_ae</span><span class="o">/</span> <span class="n">ae</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_asi</span><span class="o">/</span> <span class="n">asi</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_awf</span><span class="o">/</span> <span class="n">awf</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_awk</span><span class="o">/</span> <span class="n">awk</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_ark</span><span class="o">/</span> <span class="n">ark</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_nsp</span><span class="o">/</span> <span class="n">nsp</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_wf</span><span class="o">/</span> <span class="n">wf</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_e</span><span class="o">/</span> <span class="n">e</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_dwk</span><span class="o">/</span> <span class="n">dwk</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_wk</span><span class="o">/</span> <span class="n">wk</span>
<span class="k">common</span> <span class="o">/</span><span class="n">sh_wkh</span><span class="o">/</span> <span class="n">wkh</span>
<span class="n">fak</span> <span class="o">=</span> <span class="n">get_myid</span><span class="p">(</span><span class="n">my_id</span><span class="p">)</span>
<span class="n">how_many_slave</span><span class="o">=</span><span class="n">how_many</span>
<span class="n">left_part_slave</span><span class="o">=</span><span class="n">left_part</span>
<span class="n">iys_slave</span><span class="o">=</span><span class="n">iys</span>
<span class="n">ixs_slave</span><span class="o">=</span><span class="n">ixs</span>
<span class="n">iyl_slave</span><span class="o">=</span><span class="n">iyl</span>
<span class="n">ixl_slave</span><span class="o">=</span><span class="n">ixl</span>
<span class="n">reply</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">call </span><span class="n">athread_get</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">dwk</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="n">dwk_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="mi">31</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_get</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">wk</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="n">wk_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="mi">31</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_get</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">wkh</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="n">wkh_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="mi">31</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span>
<span class="k">do while</span><span class="p">(</span><span class="n">reply</span> <span class="p">.</span><span class="n">ne</span><span class="p">.</span> <span class="mi">3</span><span class="p">)</span>
<span class="k">end do</span>
<span class="k"> do </span><span class="mi">9999</span> <span class="n">big_loop</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="n">how_many_slave</span> <span class="o">+</span> <span class="mi">1</span>
<span class="k">if</span> <span class="p">(</span><span class="n">big_loop</span> <span class="p">.</span><span class="n">ne</span><span class="p">.</span> <span class="n">how_many_slave</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="k">then</span>
<span class="k"> </span><span class="n">steps</span> <span class="o">=</span> <span class="n">max_cpe_size</span>
<span class="n">ic</span> <span class="o">=</span> <span class="p">(((</span><span class="n">big_loop</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="n">my_id</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">max_cpe_size</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">ixl_slave</span> <span class="o">-</span> <span class="n">ixs_slave</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">ia</span> <span class="o">=</span> <span class="p">(((</span><span class="n">big_loop</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="n">my_id</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">max_cpe_size</span><span class="p">)</span> <span class="o">-</span> <span class="n">ic</span> <span class="o">*</span> <span class="p">(</span><span class="n">ixl_slave</span> <span class="o">-</span> <span class="n">ixs_slave</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">ic</span> <span class="o">=</span> <span class="n">ic</span> <span class="o">+</span> <span class="mi">1</span>
<span class="n">ia</span> <span class="o">=</span> <span class="n">ia</span> <span class="o">+</span> <span class="mi">1</span>
<span class="k">else</span>
<span class="k"> </span><span class="n">step_base</span> <span class="o">=</span> <span class="n">left_part</span> <span class="o">/</span> <span class="mi">64</span> <span class="c">!34</span>
<span class="n">step_left</span> <span class="o">=</span> <span class="n">left_part</span> <span class="o">-</span> <span class="n">step_base</span> <span class="o">*</span> <span class="mi">64</span> <span class="c">!56</span>
<span class="k">if</span> <span class="p">(</span><span class="n">my_id</span> <span class="p">.</span><span class="n">le</span><span class="p">.</span> <span class="n">step_left</span><span class="p">)</span> <span class="k">then</span>
<span class="k"> </span><span class="n">steps</span> <span class="o">=</span> <span class="n">step_base</span> <span class="o">+</span> <span class="mi">1</span>
<span class="n">ic</span> <span class="o">=</span> <span class="p">((</span><span class="n">how_many_slave</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="n">my_id</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">steps</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">ixl_slave</span> <span class="o">-</span> <span class="n">ixs_slave</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">ia</span> <span class="o">=</span> <span class="p">((</span><span class="n">how_many_slave</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="n">my_id</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">steps</span><span class="p">)</span> <span class="o">-</span> <span class="n">ic</span> <span class="o">*</span> <span class="p">(</span><span class="n">ixl_slave</span> <span class="o">-</span> <span class="n">ixs_slave</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
<span class="k">else</span>
<span class="k"> </span><span class="n">steps</span> <span class="o">=</span> <span class="n">step_base</span>
<span class="n">ic</span> <span class="o">=</span> <span class="p">((</span><span class="n">how_many_slave</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="n">step_left</span><span class="p">)</span> <span class="o">*</span> <span class="p">(</span><span class="n">step_base</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="n">my_id</span> <span class="o">-</span> <span class="n">step_left</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">steps</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">ixl_slave</span> <span class="o">-</span> <span class="n">ixs_slave</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">ia</span> <span class="o">=</span> <span class="p">((</span><span class="n">how_many_slave</span> <span class="o">*</span> <span class="n">max_cpe_size</span> <span class="o">*</span> <span class="mi">64</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="n">step_left</span><span class="p">)</span> <span class="o">*</span> <span class="p">(</span><span class="n">step_base</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="n">my_id</span> <span class="o">-</span> <span class="n">step_left</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">steps</span><span class="p">)</span> <span class="o">-</span> <span class="n">ic</span> <span class="o">*</span> <span class="p">(</span><span class="n">ixl_slave</span> <span class="o">-</span> <span class="n">ixs_slave</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">endif</span>
<span class="n">ic</span> <span class="o">=</span> <span class="n">ic</span> <span class="o">+</span> <span class="mi">1</span>
<span class="n">ia</span> <span class="o">=</span> <span class="n">ia</span> <span class="o">+</span> <span class="mi">1</span>
<span class="n">endif</span>
<span class="n">reply</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">call </span><span class="n">athread_get</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">ae</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span> <span class="n">ic</span><span class="p">),</span> <span class="n">ae_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="n">steps</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_get</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">asi</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span> <span class="n">ic</span><span class="p">),</span> <span class="n">asi_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="n">steps</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_get</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">awf</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span> <span class="n">ic</span><span class="p">),</span> <span class="n">awf_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="n">steps</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_get</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">awk</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span> <span class="n">ic</span><span class="p">),</span> <span class="n">awk_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="n">steps</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_get</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">ark</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span> <span class="n">ic</span><span class="p">),</span> <span class="n">ark_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="n">steps</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_get</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">nsp</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span> <span class="n">ic</span><span class="p">),</span> <span class="n">nsp_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="n">steps</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_get</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">wf</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">ia</span><span class="p">,</span> <span class="n">ic</span><span class="p">),</span> <span class="n">wf_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="mi">31</span> <span class="o">*</span> <span class="n">steps</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_get</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">e</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">ia</span><span class="p">,</span> <span class="n">ic</span><span class="p">),</span> <span class="n">e_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="mi">25</span> <span class="o">*</span> <span class="mi">12</span> <span class="o">*</span> <span class="n">steps</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
<span class="k">do while</span><span class="p">(</span><span class="n">reply</span> <span class="p">.</span><span class="n">ne</span><span class="p">.</span> <span class="mi">8</span><span class="p">)</span>
<span class="k">end do</span>
<span class="k"> </span><span class="n">ae_slave</span> <span class="o">=</span> <span class="mf">0.</span>
<span class="n">asi_slave</span> <span class="o">=</span> <span class="mf">0.</span>
<span class="n">awf_slave</span> <span class="o">=</span> <span class="mf">0.</span>
<span class="n">awk_slave</span> <span class="o">=</span> <span class="mf">0.</span>
<span class="n">ark_slave</span> <span class="o">=</span> <span class="mf">0.</span>
<span class="k">do </span><span class="mi">99999</span> <span class="nb">count</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="n">steps</span>
<span class="k">if</span> <span class="p">(</span><span class="n">nsp_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span> <span class="p">.</span><span class="n">ne</span><span class="p">.</span> <span class="mi">1</span><span class="p">)</span> <span class="k">cycle</span>
<span class="k"> do </span><span class="mi">200</span> <span class="n">k</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="n">kld</span>
<span class="n">k1</span> <span class="o">=</span> <span class="n">k</span> <span class="o">+</span> <span class="mi">1</span>
<span class="n">i</span> <span class="o">=</span> <span class="n">k</span> <span class="o">-</span> <span class="n">kl</span> <span class="o">+</span> <span class="mi">1</span>
<span class="n">i1</span> <span class="o">=</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span>
<span class="n">dwkk</span> <span class="o">=</span> <span class="n">dwk_slave</span><span class="p">(</span><span class="n">k</span><span class="p">)</span>
<span class="n">wfk</span> <span class="o">=</span> <span class="n">wf_slave</span><span class="p">((</span><span class="nb">count</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">31</span> <span class="o">+</span> <span class="n">k</span><span class="p">)</span> <span class="c">!(k, ia, ic)</span>
<span class="n">wfk1</span> <span class="o">=</span> <span class="n">wf_slave</span><span class="p">((</span><span class="nb">count</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">31</span> <span class="o">+</span> <span class="n">k1</span><span class="p">)</span> <span class="c">!(k1, ia, ic)</span>
<span class="n">wsk</span> <span class="o">=</span> <span class="n">zpi</span> <span class="o">*</span> <span class="n">wfk</span>
<span class="n">wsk1</span> <span class="o">=</span> <span class="n">zpi</span> <span class="o">*</span> <span class="n">wfk1</span>
<span class="n">wkk</span> <span class="o">=</span> <span class="n">wk_slave</span><span class="p">(</span><span class="n">k</span><span class="p">)</span>
<span class="n">wkk1</span> <span class="o">=</span> <span class="n">wk_slave</span><span class="p">(</span><span class="n">k1</span><span class="p">)</span>
<span class="k">do </span><span class="mi">200</span> <span class="n">j</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="n">jl</span>
<span class="k">if</span> <span class="p">(</span><span class="n">k</span> <span class="p">.</span><span class="n">lt</span><span class="p">.</span> <span class="n">kl</span><span class="p">)</span> <span class="k">then</span>
<span class="k"> </span><span class="n">ekj</span> <span class="o">=</span> <span class="n">e_slave</span><span class="p">((</span><span class="nb">count</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">25</span> <span class="o">*</span> <span class="mi">12</span> <span class="o">+</span> <span class="p">(</span><span class="n">j</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">kl</span> <span class="o">+</span> <span class="n">k</span><span class="p">)</span> <span class="c">!(k, j, ia, ic)</span>
<span class="n">ekj1</span> <span class="o">=</span> <span class="n">e_slave</span><span class="p">((</span><span class="nb">count</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">25</span> <span class="o">*</span> <span class="mi">12</span> <span class="o">+</span> <span class="p">(</span><span class="n">j</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">kl</span> <span class="o">+</span> <span class="n">k1</span><span class="p">)</span> <span class="c">!(k1, j, ia, ic)</span>
<span class="k">else</span>
<span class="k"> </span><span class="n">ekj</span> <span class="o">=</span> <span class="n">e_slave</span><span class="p">((</span><span class="nb">count</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">25</span> <span class="o">*</span> <span class="mi">12</span> <span class="o">+</span> <span class="p">(</span><span class="n">j</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">kl</span> <span class="o">+</span> <span class="n">kl</span><span class="p">)</span> <span class="o">*</span> <span class="n">wkh_slave</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="c">!(kl, j, ia, ic)</span>
<span class="n">ekj1</span> <span class="o">=</span> <span class="n">e_slave</span><span class="p">((</span><span class="nb">count</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">25</span> <span class="o">*</span> <span class="mi">12</span> <span class="o">+</span> <span class="p">(</span><span class="n">j</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">kl</span> <span class="o">+</span> <span class="n">kl</span><span class="p">)</span> <span class="o">*</span> <span class="n">wkh_slave</span><span class="p">(</span><span class="n">i1</span><span class="p">)</span> <span class="c">!(kl, j, ia, ic)</span>
<span class="n">endif</span>
<span class="n">ae_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">=</span><span class="n">ae_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">+</span><span class="p">(</span><span class="n">ekj</span><span class="o">+</span><span class="n">ekj1</span><span class="p">)</span><span class="o">*</span><span class="n">dwkk</span>
<span class="n">awf_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">=</span><span class="n">awf_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">+</span><span class="p">(</span><span class="n">ekj</span><span class="o">*</span><span class="n">wfk</span><span class="o">+</span><span class="n">ekj1</span><span class="o">*</span><span class="n">wfk1</span><span class="p">)</span><span class="o">*</span><span class="n">dwkk</span>
<span class="n">asi_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">=</span><span class="n">asi_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">+</span><span class="p">(</span><span class="n">ekj</span><span class="o">/</span><span class="n">wsk</span><span class="o">+</span><span class="n">ekj1</span><span class="o">/</span><span class="n">wsk1</span><span class="p">)</span><span class="o">*</span><span class="n">dwkk</span>
<span class="n">awk_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">=</span><span class="n">awk_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">+</span><span class="p">(</span><span class="n">ekj</span><span class="o">*</span><span class="n">wkk</span><span class="o">+</span><span class="n">ekj1</span><span class="o">*</span><span class="n">wkk1</span><span class="p">)</span><span class="o">*</span><span class="n">dwkk</span>
<span class="n">ark_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">=</span><span class="n">ark_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">+</span><span class="p">(</span><span class="n">ekj</span><span class="o">/</span><span class="nb">sqrt</span><span class="p">(</span><span class="n">wkk</span><span class="p">)</span><span class="o">+</span><span class="n">ekj1</span><span class="o">/</span><span class="nb">sqrt</span><span class="p">(</span><span class="n">wkk1</span><span class="p">))</span><span class="o">*</span><span class="n">dwkk</span>
<span class="mi">200</span> <span class="k">continue</span>
<span class="k"> </span><span class="n">asi_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">=</span><span class="n">ae_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">/</span><span class="n">asi_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span>
<span class="n">awf_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">=</span><span class="n">awf_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">/</span><span class="n">ae_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span>
<span class="n">awk_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">=</span><span class="n">awk_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">/</span><span class="n">ae_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span>
<span class="n">ark_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">=</span><span class="p">(</span><span class="n">ae_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">)</span><span class="o">/</span><span class="n">ark_slave</span><span class="p">(</span><span class="nb">count</span><span class="p">))</span><span class="o">**</span><span class="mi">2</span>
<span class="mi">99999</span> <span class="k">continue</span>
<span class="k"> </span><span class="n">reply</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">call </span><span class="n">athread_put</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">ae_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="n">ae</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span> <span class="n">ic</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="n">steps</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_put</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">asi_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="n">asi</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span> <span class="n">ic</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="n">steps</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_put</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">awf_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="n">awf</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span> <span class="n">ic</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="n">steps</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_put</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">awk_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="n">awk</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span> <span class="n">ic</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="n">steps</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
<span class="k">call </span><span class="n">athread_put</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">ark_slave</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="n">ark</span><span class="p">(</span><span class="n">ia</span><span class="p">,</span> <span class="n">ic</span><span class="p">),</span> <span class="mi">4</span> <span class="o">*</span> <span class="n">steps</span><span class="p">,</span> <span class="n">reply</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
<span class="k">do while</span><span class="p">(</span><span class="n">reply</span> <span class="p">.</span><span class="n">ne</span><span class="p">.</span> <span class="mi">5</span><span class="p">)</span>
<span class="k">end do</span>
<span class="k"> </span><span class="mi">9999</span> <span class="k">continue</span>
<span class="c">!==========================================================================================================================</span>
<span class="k">end subroutine </span><span class="n">mean2_slave</span>
</pre></div>
<h2>结语</h2>
<p>经过这次比赛,认识到了自己和顶尖选手的差距。不过差距也没有到达天壤之别的地步,希望明年能够提前多做准备,能够让华科和自己不要这样的丢脸。加油!</p>
</article>
<div class="tags">
<p>tags: <a href="/tag/contest.html">contest</a>, <a href="/tag/parallelization.html">parallelization</a></p>
</div>
<hr>
</div>
</div>
</div>
<hr>
<!-- Footer -->
<footer>
<div class="container">
<div class="row">
<div class="col-lg-8 col-lg-offset-2 col-md-10 col-md-offset-1">
<ul class="list-inline text-center">
<li>
<a href="https://github.com/HUSTMeituanClub">
<span class="fa-stack fa-lg">
<i class="fa fa-circle fa-stack-2x"></i>
<i class="fa fa-github fa-stack-1x fa-inverse"></i>
</span>
</a>
</li>
<li>
<a href="mailto:@hustmeituan.club">
<span class="fa-stack fa-lg">
<i class="fa fa-circle fa-stack-2x"></i>
<i class="fa fa-envelope fa-stack-1x fa-inverse"></i>
</span>
</a>
</li>
</ul>
<p class="copyright text-muted">
Blog powered by <a href="http://getpelican.com">Pelican</a>,
which takes great advantage of <a href="http://python.org">Python</a>.
</p> </div>
</div>
</div>
</footer>
<!-- jQuery -->
<script src="/theme/js/jquery.min.js"></script>
<!-- Bootstrap Core JavaScript -->
<script src="/theme/js/bootstrap.min.js"></script>
<!-- Custom Theme JavaScript -->
<script src="/theme/js/clean-blog.min.js"></script>
</body>
</html>