#!/usr/bin/python

# Two long strings defined first

param_template = """


DATATYPE 0				# The type of data to be read in.
					# 0 = individual data in the file
					# specified by INDFILE, 1 = population
					# data in the file specified by
					# POPFILE.

INDFILE %(INDFILENAME)s.indfile   		# The name of the individual datafile.
					# Required if DATATYPE = 0.

OUTFILE %(INDFILENAME)s.outfile     		# The average cluster membership 
					# coefficients across the permuted runs
					# are printed here.

MISCFILE %(INDFILENAME)s.miscfile


K %(KVAL)s      				# Number of clusters.s

C %(POPVAL)s	 	      	        # Number of individuals or populations.

R %(RUNVAL)s		            	# Number of runs.

M %(METHODVAL)s				# Method to be used (1 = FullSearch,
					# 2 = Greedy, 3 = LargeKGreedy).

W 0					# Weight by the number of individuals
					# in each population as specified in 
					# the datafile (1 if yes, 0 if no).

S 1                                     # Pairwise matrix similarity statistic 
                                        # to be used. 1 = G, 2 = G'. 



GREEDY_OPTION %(GREEDY_OPTIONVAL)s		# 1 = All possible input orders,
					# 2 = random input orders,
					# 3 = pre-specified input orders.

REPEATS %(REPEATSVAL)s				# If GREEDY_OPTION = 2, then REPEATS
					# determines the number of random input
					# orders to be tested. If GREEDY_OPTION
					# = 3, then REPEATS is the number of 
					# input orders in PERMUTATIONFILE.  

PERMUTATIONFILE %(INDFILENAME)s.permutationfile	# The permutations of the runs in 
					# PERMUTATIONFILE will be used, if 
					# GREEDY_OPTION = 3.




PRINT_PERMUTED_DATA 1			# Print the permuted data (clusters) in
					# INDFILE or POPFILE to 
					# PERMUTED_DATAFILE (0 = don't print,
					# 1 = print into one file, 2 = print 
					# into separate files for each run).

PERMUTED_DATAFILE %(INDFILENAME)s.perm_datafile	# The permuted data (clusters) will be 
					# printed to this file (if 
					# PRINT_PERMUTED_DATA = 2, several 
					# files with the extensions "_1" to 
					# "_R" will be created).

PRINT_EVERY_PERM 0			# Print every tested permutation of the
					# runs and the corresponding value of 
					# SSC to a file specified by 
					# EVERY_PERMFILE (0 = don't print, 
					# 1 = print). 
					# Note that printing may result in a 
					# very large file.

EVERY_PERMFILE %(INDFILENAME)s.every_permfile	# Every tested permutation of the runs
					# and the corresponding SSC will be
					# printed here. 

PRINT_RANDOM_INPUTORDER 0               # Print random input orders of runs to
                                        # RANDOM_INPUTORDER (0 = don't print,
                                        # 1 = print). This option is only 
                                        # available if GREEDY_OPTION = 2.

RANDOM_INPUTORDERFILE %(INDFILENAME)s.random_inputorderfile # Every random input order 
                                        # of the runs (generated by CLUMPP if 
                                        # GREEDY_OPTION = 2) will be printed
                                        # here. 



OVERRIDE_WARNINGS 0			# This option allows the user to 
					# override non-crucial warnings from 
					# the program (0 allow warnings, 1 do 
					# not issue non-crucial warnings).

ORDER_BY_RUN 1				# Permute the clusters of the output 
					# files by the specified run. (0 to 
					# not specify a run, 1 to R specifies
					# a run in the INDFILE or POPFILE).   

"""




drawparam_template = """

PARAMETERS FOR THE PROGRAM distruct.  YOU WILL NEED TO SET THESE
IN ORDER TO RUN THE PROGRAM.  

"(int)" means that this takes an integer value.
"(B)"   means that this variable is Boolean 
        (1 for True, and 0 for False)
"(str)" means that this is a string (but not enclosed in quotes) 
"(d)"   means that this is a double (a real number).

Data settings

#define INFILE_POPQ        %(INDFILENAME)s.popfile  
#define INFILE_INDIVQ      %(INDFILENAME)s.outfile  
#define INFILE_LABEL_BELOW %(POPLABEL)s
#define INFILE_LABEL_ATOP  None
#define INFILE_CLUST_PERM  %(INDFILENAME)s.perm     // (str) input file of permutation of clusters to print  

#define OUTFILE            %(OUTPSF)s       //(str) name of output file

#define K	%(KVAL)s    
#define NUMPOPS %(NUMPOP)s  
#define NUMINDS %(POPVAL)s

Main usage options

#define PRINT_INDIVS      1  // (B) 1 if indiv q's are to be printed, 0 if only population q's
#define PRINT_LABEL_ATOP  %(PRINT_LABEL_ATOP)s  // (B) print labels above figure
#define PRINT_LABEL_BELOW 1  // (B) print labels below figure
#define PRINT_SEP         1  // (B) print lines to separate populations

Figure appearance

#define FONTHEIGHT 7	// (d) size of font
#define DIST_ABOVE 3	// (d) distance above plot to place text
#define DIST_BELOW -3	// (d) distance below plot to place text
#define BOXHEIGHT  %(BOXHEIGHT)s	// (d) height of the figure
#define INDIVWIDTH %(INDIVWIDTH)s	// (d) width of an individual


Extra options

#define ORIENTATION 0		     // (int) 0 for horizontal orientation (default)
			     //       1 for vertical orientation
			     //	      2 for reverse horizontal orientation
                             //       3 for reverse vertical orientation
#define XORIGIN %(XORIGIN)s	// (d) lower-left x-coordinate of figure
#define YORIGIN %(YORIGIN)s	// (d) lower-left y-coordinate of figure
#define XSCALE 1		// (d) scale for x direction
#define YSCALE 1		// (d) scale for y direction
#define ANGLE_LABEL_ATOP 90	// (d) angle for labels atop figure (in [0,180])
#define ANGLE_LABEL_BELOW 90    // (d) angle for labels below figure (in [0,180])
#define LINEWIDTH_RIM  1	// (d) width of "pen" for rim of box
#define LINEWIDTH_SEP 0.3	// (d) width of "pen" for separators between pops and for tics
#define LINEWIDTH_IND 4	// (d) width of "pen" used for individuals 
#define GRAYSCALE 0	        // (B) use grayscale instead of colors
#define ECHO_DATA 1             // (B) print some of the data to the screen
#define REPRINT_DATA 1          // (B) print the data as a comment in the ps file
#define PRINT_INFILE_NAME 0     // (B) print the name of INFILE_POPQ above the figure 
                                //     this option is meant for use only with ORIENTATION=0 
#define PRINT_COLOR_BREWER 0    // (B) print ColorBrewer settings in the output file 
                                //     this option adds 1689 lines and 104656 bytes to the output
                                //     and is required if using ColorBrewer colors


Command line options:

-d drawparams
-K K
-M NUMPOPS
-N NUMINDS
-p input file (population q's)
-i input file (individual q's)
-a input file (labels atop figure)
-b input file (labels below figure)
-c input file (cluster permutation)
-o output file







"""




import argparse
import glob
import re
import os
import sys


#---------------- Globals

num2pop ={}


#---------------------------------

def parseArguments():
    parser = argparse.ArgumentParser(description='create clumpp paramfile.')
    parser.add_argument('base', metavar='S', type=str, 
                   help='base file name')
    parser.add_argument('-K', dest='K', action='store',required=True,
                   help='K -- number of clusters ')
    parser.add_argument('--glob', dest='glob', action='store',
                   default = "./", 
                   help='glob ')
    parser.add_argument('--popfname', dest='popfname', action='store',
                   default="", 
                   help='name of pop file in phe format ')
    parser.add_argument('--popcol', dest='popcol', action='store',
                   default = 2, type=int, 
                   help='column in popfile (number from 0) default is 2 ')

    parser.add_argument("--popdescr",dest="popdescr",action='store',
                        default="",
                        help=" (output) file name of num pop")
    parser.add_argument('--output', dest='output', action='store',
                   default = "", 
                   help='output name (default is basename) ')

    parser.add_argument('--par_clumpp', dest='parclumpp', action='store_true',
                   default = False, 
                   help=' Produce clumpp param file')


    parser.add_argument('--order_pops', dest='orderpops', action='store',
                   default = "", 
                   help=' Order of populations in distruct output')



    parser.add_argument('--par_distruct', dest='pardistruct', action='store_true',
                   default = False, 
                   help=' Produce distruct param file')


    parser.add_argument('--clumpp_paramfile', dest='clumpp_paramfile', action='store',
                   default = "paramfile", 
                   help='name of clumpp parameter file (output default is clumpp_paramfile) ')


    parser.add_argument('--distruct_paramfile', dest='distruct_paramfile',
                   action='store', default = "drawparams",
                   help='name of distruct parameter file (distruct param: default is dfawparamfile) ')




    parser.add_argument('--outputps', dest="outputps", action='store',
                   default = "output.ps",
                   help='name of output postscript file')


    parser.add_argument('--doclumpp', dest="doclumpp", action='store_true',
                   default = False,
                   help='run clumpp')

    parser.add_argument('--guesspops', dest="guesspops", action='store_true',
                   default = False,
                   help='guess matching between pops')



    parser.add_argument('--dodistruct', dest="dodistruct", action='store_true',
                   default = False,
                   help='run distruct')

    args = parser.parse_args()
    return args



def getFamPop(base,popfname,popcol,popdescr,orderpops):
    indname =  []
    with open("%s.fam"%base) as bf:
        for line in bf:
            data = line.split()
            indname.append("%s:%s"%(data[0],data[1]))
    ind2pop = {}
    pop2num = {}
    # read the fam file to get individuals name
    popcode = 1
    popname =  []
    if popfname:
       print "here <%s>"%popfname
       with open(popfname) as pf:
           for line in pf: 
               data = line.split()
               key = "%s:%s"%(data[0],data[1])
               pop = data[popcol]
               if pop not in pop2num:
                   pop2num[pop]=popcode
                   num2pop[str(popcode)]=pop
                   popname.append(pop)
                   popcode = popcode+1
               ind2pop[key]= pop2num[pop]
       with open(popdescr,"w") as pf:
          for pop in orderpops:
             pf.write("%d %s\n"%(pop2num[pop],pop))
          for pop in pop2num.keys():
              if pop not in orderpops:
                 pf.write("%d %s\n"%(pop2num[pop],pop))
    return (indname, ind2pop,popcode,pop2num)


def makeIndivids(pattern,base,K,output):
    qpattern = "%s%s.%s.Q"%(pattern,base,K)
    inputMats = glob.glob(qpattern)
    if len(inputMats)==0:
        sys.exit("The glob pattern <%s> does not match any files"%qpattern)
    used_pops = []
    indf = open("%s.indfile"%output,"w")
    for fname in inputMats:
        f= open(fname)
        ind = 0
        for line in f:
            individ = indname[ind]
            pop = ind2pop.get(individ,ind)
            if pop==0: pop=1
            if pop not in used_pops:
                used_pops.append(pop)
            indf.write("%d %d (%d) %d : %s"%\
                      (ind+1,ind+1,ind+1,pop,line))
            ind = ind+1
        f.close()
    indf.close()
    return (len(inputMats), used_pops, ind)


def outputTemplate(paramfile,template,parms):
    wf = open(paramfile,"w")
    wf.write(template%parms)
    wf.close()



def popQProduce(output,K):
    K = int(K)
    indf = open("%s.outfile"%output)
    numinpop = [0]*numpops
    p = [[0]*K for i in range(numpops)]
    for line in indf:
        data = re.findall("[\.\w]+",line)
        ind = int(data[0])-1
        probs = map(float,data[-int(K):])
        currpop = ind2pop[indname[ind]]
        numinpop[currpop] += 1
        for i in range(K):
            p[currpop][i] += probs[i]
    for i in range(numpops):
        for j in range(K):
            if numinpop[i] > 0:
                p[i][j] = p[i][j]/numinpop[i]
    indf.close()
    qf = open("%s.popfile"%output,"w")
    for i in used_pops:
        if numinpop[i]==0: continue
        qf.write("%d:"%(i))
        for j in range(K):
          qf.write(" %f"%p[i][j])
        qf.write(" %d\n"%numinpop[i])
    qf.close()



def guessPops(output):
    qf = open("%s.popfile"%output,"r")
    for line in qf:
        data = re.findall("([\w.]+)",line)
        p = data[0]
        data[0]=-1
        qs = map(float,data[0:-1])
        big1=0
        big2=0
        for (i,q) in enumerate(qs):
            if q>qs[big1]:
                big2=big1
                big1=i
            elif q>qs[big2]:
                big2=i
        if big1 > 50 and qs[big1]-qs[big2]>0.05:
            print "Group %s (%d) is population %d\n"%(pop2num[p],p,big1)
        else:
            print "Group %s (%s) fingerprint %d [%f],  %d[%f]\n"%\
                   (num2pop[p],p,big1,qs[big1],big2,qs[big2])

    qf.close()


    
args = parseArguments()


if not args.popdescr:
    args.popdescr="%s.names"%args.base

# The order of populations -- important for output picture only
if args.orderpops:
    orderpops = args.orderpops.split(",")
else:
    orderpops = ""



(indname,ind2pop, numpops, pop2num) = \
    getFamPop(args.base,args.popfname,args.popcol,args.popdescr,orderpops)

# indname -- indname[10] is the name of the 10th person in the file
# ind2pop -- given alpha key says which population of individ
# numpops -- how many pops in popfile
# used_pops -- actual populations 
# pop2num -- given population gives a population code

if not args.output: args.output=args.base


(numruns,used_pops, numinds) = makeIndivids(args.glob,args.base,args.K,args.output)



params = { 'INDFILENAME': "%s"%args.output,\
           'KVAL' : args.K,\
           'POPVAL' : numinds,\
           'POPLABEL' : args.popdescr,\
           'OUTPSF'  :  args.outputps,\
           'NUMPOP'  : len(used_pops),
           'RUNVAL' : numruns,\
           'METHODVAL' : "2",\
           'GREEDY_OPTIONVAL' : "2",
           'PRINT_LABEL_ATOP' : "0",
           'REPEATSVAL' : "10",\
           'XORIGIN'    : "30",\
           'YORIGIN'    :  "600",
           'BOXHEIGHT'  :  "25",
           'INDIVWIDTH' :  "1.4"}



if args.guesspops:
    guessPops(args.output)
    sys.exit(0)

if args.parclumpp:
    outputTemplate(args.clumpp_paramfile,param_template,params)


if args.doclumpp:
    os.system('clumpp %s'%args.clumpp_paramfile)

if args.pardistruct:
    outputTemplate(args.distruct_paramfile,drawparam_template,params)
    popQProduce(args.output,args.K)


if args.dodistruct:
    y1 = int(params['YORIGIN'])-50
    y2 = y1+int(params['BOXHEIGHT'])*2+50+7
    x1 = 0
    x2 = int(float(params['INDIVWIDTH'])*numinds+2*int(params['XORIGIN']))
    bbox = "%%%%BoundingBox: %d %d %d %d"%(x1,y1,x2,y2)
    os.system('distruct -d %s'%args.distruct_paramfile)
    sedcmd = 'sed -i -e "s/%% Command line arguments: distruct.*/%s/"'%bbox
    os.system(sedcmd+" %s"%args.outputps)
    os.system("epstopdf %s"%args.outputps)

