Skip to content

Commit

Permalink
Merge pull request #132 from bioinform/xgboost_ntree_limit_to_iterati…
Browse files Browse the repository at this point in the history
…on_range

ntree_limit deprecated into iteration_range in xgboost>=1.4
  • Loading branch information
litaifang authored Feb 4, 2024
2 parents e75ff02 + bb31c4d commit 283b83d
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 11 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
url="https://github.com/bioinform/somaticseq",
packages=find_packages(),
package_data={"": ["*.R"]},
install_requires=["pysam", "numpy", "scipy", "pandas", "xgboost"],
install_requires=["pysam", "numpy", "scipy", "pandas", "xgboost>=1.4"],
scripts=[
"somaticseq/somaticseq_parallel.py",
"somaticseq/run_somaticseq.py",
Expand Down
11 changes: 1 addition & 10 deletions somaticseq/somatic_xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ def param_list_to_dict(param_list, existing_param_dict=DEFAULT_PARAM):


def save_feature_importance_to_file(xgb_model, filename):

feature_gain = xgb_model.get_score(importance_type="gain")
feature_weight = xgb_model.get_score(importance_type="weight")
feature_cover = xgb_model.get_score(importance_type="cover")
Expand All @@ -73,11 +72,9 @@ def save_feature_importance_to_file(xgb_model, filename):
)

with open(filename, "w") as fout:

fout.write(line_i)

for feature_i in sorted(feature_gain):

line_i = "{}\t{}\t{}\t{}\t{}\t{}\n".format(
feature_i,
feature_gain[feature_i],
Expand All @@ -98,7 +95,6 @@ def builder(
num_rounds=DEFAULT_XGB_BOOST_ROUNDS,
model=None,
):

logger = logging.getLogger("xgboost_" + builder.__name__)
logger.info("TRAINING {} for XGBOOST".format(",".join(input_tsvs)))
logger.info("Columns removed before training: {}".format(", ".join(non_feature)))
Expand Down Expand Up @@ -147,7 +143,6 @@ def predictor(
non_feature=NON_FEATURE,
iterations=DEFAULT_NUM_TREES_PREDICT,
):

logger = logging.getLogger("xgboost_" + predictor.__name__)
logger.info("Columns removed for prediction: {}".format(",".join(non_feature)))
logger.info("Number of trees to use = {}".format(iterations))
Expand All @@ -162,14 +157,13 @@ def predictor(
for input_data in pd.read_csv(
input_tsv, sep="\t", chunksize=chunksize, low_memory=False
):

test_data = ntchange.ntchange(input_data)
for non_feature_i in non_feature:
if non_feature_i in test_data:
test_data.drop(non_feature_i, axis=1, inplace=True)

dtest = xgb.DMatrix(test_data)
scores = xgb_model.predict(dtest, ntree_limit=iterations)
scores = xgb_model.predict(dtest, iteration_range=(0, iterations))
predicted = input_data.assign(SCORE=scores)

predicted.to_csv(
Expand All @@ -189,7 +183,6 @@ def predictor(
################################################################################################
# Execute:
if __name__ == "__main__":

parser = argparse.ArgumentParser(
description="Run XGBoost",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
Expand Down Expand Up @@ -274,7 +267,6 @@ def predictor(
args = parser.parse_args()

if args.which == "train":

PARAM = copy(DEFAULT_PARAM)

if args.num_threads:
Expand Down Expand Up @@ -306,7 +298,6 @@ def predictor(
)

elif args.which == "predict":

for feature_i in args.features_excluded:
NON_FEATURE.append(feature_i)

Expand Down

0 comments on commit 283b83d

Please sign in to comment.