-
Notifications
You must be signed in to change notification settings - Fork 0
/
randomselection.py
65 lines (52 loc) · 2.06 KB
/
randomselection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import fileinput
import random
f = "P:\\modeling\\new_finalnumericgenotype.dat"
allcaselineDicList = []
allcontrollineDicList = []
head = ""
for line in fileinput.input(f):
if line.startswith("sample phenotype rs3094315"):
head = line.strip()
else:
linearr = line.strip().split("\t")
lineDic = {}
lineDic[linearr[0]] = line.strip()
print linearr[1]
if linearr[1] == '0':
allcontrollineDicList.append(lineDic)
if linearr[1] == '1':
allcaselineDicList.append(lineDic)
traningcaserandomlst = random.sample(allcaselineDicList, 250)
traningcontrolrandomlst = random.sample(allcontrollineDicList, 250)
def getremaninglst(partlst, alllst):
remainlst = []
for e in alllst:
if e not in partlst:
remainlst.append(e)
return remainlst
testcaserandomlst = getremaninglst(traningcaserandomlst, allcaselineDicList)
testcontrolrandomlst = getremaninglst(traningcontrolrandomlst, allcontrollineDicList)
trainingcontrolfile = "P:\\modeling\\new_trainingcontrolfile.dat"
testcontrolfile = "P:\\modeling\\new_testcontrolfile.dat"
traningcasefile = "P:\\modeling\\new_traningcasefile.dat"
testcasefile = "P:\\modeling\\new_testcasefile.dat"
allcontrolfile="P:\\modeling\\new_allcontrolfile.dat"
allcasefile= "P:\\modeling\\new_allcasefile.dat"
def writelist2file(lst, filew, h):
outfile = open(filew, "w")
outfile.write(h + "\n")
sample_list = []
for d in lst:
v = "\n".join(d.values())
sample_list.append(v)
sample_list = [l + '\n' for l in sample_list]
for ele in sample_list:
outfile.write(ele)
outfile.close()
writelist2file(traningcaserandomlst, traningcasefile, head)
writelist2file(testcaserandomlst, testcasefile, head)
writelist2file(traningcontrolrandomlst, trainingcontrolfile, head)
writelist2file(testcontrolrandomlst, testcontrolfile, head)
writelist2file(allcaselineDicList, allcasefile, head)
writelist2file(allcontrollineDicList, allcontrolfile, head)