diff --git a/src/command_line.py b/src/command_line.py index b83bee1..0c0052b 100644 --- a/src/command_line.py +++ b/src/command_line.py @@ -46,6 +46,7 @@ options['z'] = [ 'openmp' , 'Automatic openMP tasks (may not work)', 0 ] options['l'] = [ 'align' , 'Align arrays using compiler directives', 1 ] options['s'] = [ 'substitute' , 'Substitute values for loop max values', 1 ] options['r'] = [ 'no_directives', 'Ignore compiler directives !DEC$ and !DIR$', 0 ] +options['n'] = [ 'inline' , 'all|providers|builders : Force inlining of providers or builders', 1 ] class CommandLine(object): @@ -64,6 +65,16 @@ class CommandLine(object): return self._defined defined = property(fget=defined) + def inline(self): + if '_inline' not in self.__dict__: + self._inline = "" + for o,a in self.opts: + if o in [ "-n", '--'+options['n'][0] ]: + self._inline = a + break + return self._inline + inline = property(fget=inline) + def substituted(self): if '_substituted' not in self.__dict__: self._substituted = {} diff --git a/src/makefile.py b/src/makefile.py index 96827e8..b48f743 100644 --- a/src/makefile.py +++ b/src/makefile.py @@ -89,7 +89,7 @@ def run(): print >>file, "OBJ1 = $(patsubst %%, %s%%,$(notdir $(OBJ))) %sirp_touches.irp.o"%(irpdir,irpdir), if command_line.do_profile: - print >>file, " %sirp_profile.irp.o"%(irpdir), " %sirp_rdtsc.o"%(irpdir), + print >>file, " %sirp_profile.irp.o"%(irpdir), " irp_rdtsc.o", if command_line.do_openmp: print >>file, " %sirp_locks.irp.o"%(irpdir), else: diff --git a/src/preprocessed_text.py b/src/preprocessed_text.py index 66dfe4f..defc770 100644 --- a/src/preprocessed_text.py +++ b/src/preprocessed_text.py @@ -34,6 +34,8 @@ from util import * re_endif = re.compile("end\s+if") re_elseif = re.compile("else\s+if") re_enddo = re.compile("end\s+do") +re_endtype= re.compile("end\s+type") +re_endmodule = re.compile("end\s+module") re_endselect = re.compile("end\s+select") # Local variables @@ -100,6 +102,8 @@ def get_type (i, filename, line, is_doc): # Replacements lower_line = re_elseif.sub("elseif",lower_line) lower_line = re_enddo.sub("enddo",lower_line) + lower_line = re_endtype.sub("endtype",lower_line) + lower_line = re_endmodule.sub("endmodule",lower_line) lower_line = re_endif.sub("endif",lower_line) lower_line = re_endselect.sub("endselect",lower_line) for c in """()'"[]""": diff --git a/src/profile.py b/src/profile.py index 23b00f8..f1321d6 100644 --- a/src/profile.py +++ b/src/profile.py @@ -29,8 +29,7 @@ def build_rdtsc(): file.write(rdtsc) file.close() def t(): - p = subprocess.Popen(["gcc","-O2",filename,"-c","-o","IRPF90_temp/irp_rdtsc.o"]) - + p = subprocess.Popen(["gcc","-O2",filename,"-c","-o","irp_rdtsc.o"]) p.communicate() os.remove(filename) @@ -39,14 +38,52 @@ def build_rdtsc(): def build_module(): data = """ module irp_timer - double precision :: irp_profile(2,%(n)d) + double precision :: irp_profile(3,%(n)d) + integer :: irp_order(%(n)d) character*(64) :: irp_profile_label(%(n)d) + double precision :: irp_rdtsc_shift + + contains + + subroutine profile_sort () + implicit none + character*(64) :: xtmp + integer :: i, i0, j, jmax + + do i=1,size(irp_profile_label) + irp_order(i)=i + enddo + do i=1,size(irp_profile_label) + xtmp = irp_profile_label(i) + i0 = irp_order(i) + j = i-1 + do j=i-1,1,-1 + if ( irp_profile_label(j) > xtmp ) then + irp_profile_label(j+1) = irp_profile_label(j) + irp_order(j+1) = irp_order(j) + else + exit + endif + enddo + irp_profile_label(j+1) = xtmp + irp_order(j+1) = i0 + enddo + end subroutine profile_sort + end module subroutine irp_init_timer use irp_timer implicit none - irp_profile = 0. + integer :: i + double precision :: irp_rdtsc, t0 + irp_profile = 0.d0 + irp_rdtsc_shift = 0.d0 + do i=1,1000 + t0 = irp_rdtsc() + irp_rdtsc_shift = irp_rdtsc_shift + (irp_rdtsc()-t0) + enddo + irp_rdtsc_shift = 1.d-3*irp_rdtsc_shift %(text)s end @@ -54,33 +91,55 @@ subroutine irp_set_timer(i,value) use irp_timer implicit none integer, intent(in) :: i - double precision, intent(in) :: value + double precision, intent(inout) :: value + value = value - irp_rdtsc_shift irp_profile(1,i) = irp_profile(1,i) + value - irp_profile(2,i) = irp_profile(2,i) + 1.d0 + irp_profile(2,i) = irp_profile(2,i) + value*value + irp_profile(3,i) = irp_profile(3,i) + 1.d0 end subroutine irp_print_timer() use irp_timer implicit none - integer :: i - print '(A24,A8,4(X,A14))', 'Calls', 'Tot Cycles', 'Avge Cycles', & - 'Tot Secs(1GHz)', 'Avge Secs(1GHz)' + integer :: i, ii + double precision :: error, sigma2, average, average2, frequency, t0 + double precision :: irp_rdtsc + t0 = irp_rdtsc() + call sleep(1) + frequency = (irp_rdtsc()-t0-irp_rdtsc_shift) + + call profile_sort() + print '(A24,A8,A17,A20,A13,A20)', '', 'N.Calls', 'Tot Cycles', 'Avg Cycles', & + 'Tot Secs', 'Avg Secs' print '(A)', '----------------------------------------------'// & '----------------------------------------------' - do i=1,%(n)d - if (irp_profile(2,i) > 0.) then - print '(A24,F8.0,2(X,F14.0),2(X,F14.8))', & - irp_profile_label(i), irp_profile(2,i), & - irp_profile(1,i), irp_profile(1,i)/irp_profile(2,i), & - irp_profile(1,i)*1.d-9, 1.d-9*irp_profile(1,i)/irp_profile(2,i) + do ii=1,%(n)d + i = irp_order(ii) + if (irp_profile(3,i) > 0.) then + error = 0.d0 + average = irp_profile(1,i)/irp_profile(3,i) + if (irp_profile(3,i) > 1.d0) then + average2 = irp_profile(2,i)/irp_profile(3,i) + sigma2 = (average2 - average*average) + error = sqrt(sigma2/(irp_profile(3,i)+1.d0)) + endif + print '(A24 , F8.0 , X,F12.0 , X,F12.0,A3,F8.0, X,F12.8, X,F8.5,A3,F8.5 )', & + irp_profile_label(ii), & + irp_profile(3,i), & + irp_profile(1,i), & + average, '+/-', error, & + irp_profile(1,i)/frequency, & + average/frequency, '+/-', error/frequency endif enddo + print *, 'Frequency :', frequency*1.d-9, ' GHz' + print *, 'rdtsc latency :', irp_rdtsc_shift, ' cycles' end """ label = {} for i in variables: vi = variables[i] - label[vi.label] = vi.name + label[vi.label] = vi.same_as text = [] lmax = 0 for l in label: diff --git a/src/regexps.py b/src/regexps.py index 0b19934..3dd1b56 100644 --- a/src/regexps.py +++ b/src/regexps.py @@ -46,6 +46,7 @@ re_decl = re.compile( "".join( [ r"^\ *", r"|intrinsic *(::)?", r"|external *(::)?", r"|equivalence *(::)?", + r"|type", r")[^=(]" ] ) ) diff --git a/src/variable.py b/src/variable.py index 591b523..d4dccf2 100644 --- a/src/variable.py +++ b/src/variable.py @@ -507,14 +507,17 @@ class Variable(object): " endif" ] return result - result = [ "subroutine provide_%s"%(name) ] + result = [] + if command_line.directives and command_line.inline in ["all","providers"]: + result += [ "!DEC$ ATTRIBUTES FORCEINLINE :: provide_%s"%(name) ] + result += [ "subroutine provide_%s"%(name) ] result += build_use( [same_as]+self.to_provide ) result.append(" implicit none") length = len("provide_%s"%(name)) result += [\ " character*(%d) :: irp_here = 'provide_%s'"%(length,name), " integer :: irp_err ", - " logical :: irp_dimensions_OK" ] + " logical :: irp_dimensions_OK" ] if command_line.do_openmp: result.append(" call irp_lock_%s(.True.)"%(same_as)) if command_line.do_assert or command_line.do_debug: @@ -570,7 +573,10 @@ class Variable(object): text = map(lambda x: x[1], text) for line in filter(lambda x: type(x) not in [ Begin_doc, End_doc, Doc], text): if type(line) == Begin_provider: - result = [ "subroutine bld_%s"%(name) ] + result = [] + if command_line.directives and command_line.inline in ["all","builders"]: + result += [ "!DEC$ ATTRIBUTES FORCEINLINE :: bld_%s"%(same_as) ] + result += [ "subroutine bld_%s"%(name) ] result += build_use([name]+self.needs) elif type(line) == Cont_provider: pass diff --git a/src/version.py b/src/version.py index 71115e0..cd688d2 100644 --- a/src/version.py +++ b/src/version.py @@ -1 +1 @@ -version = "1.2.3" +version = "1.2.4"