Output¶

class pygna.output.Output(network_filename: str, output_table_results_file: str, analysis: str, geneset_file: str, setnames: list, geneset_file_B: str = None, setnames_B: list = None)[source]¶

This class is used to print different data on files

add_GMT_entry(key: str, descriptor: str, gene_list: str) → None[source]¶

Add a gmt entry in the GMT file

Parameters:	key – the key name to store descriptor – the descriptor of the gene list gene_list – the gene list to write

Example

>>> geneset = rc.ReadGmt("geneset_file.csv").get_geneset("brca")
>>> setnames = [key for key in geneset.keys()]
>>> import pygna.reading_class as rc
>>> network = rc.ReadTsv("network_file.tsv").get_network()
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> for setname, item in geneset.items():
...     item = set(item)
...     module = nx.subgraph(network, item)
...     lcc = sorted(list(nx.connected_components(module)), key=len, reverse=True)[0]
...     out.add_GMT_entry("brca", "topology_module", lcc)

close_temporary_table() → None[source]¶

Remove the temporary file

Example

>>> setnames = ["A", "B", "C"]
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> out.create_st_table_empirical()
>>> out.close_temporary_table()

create_GMT_output(output_gmt: str) → None[source]¶

Write the GMT line on the GMT file

Parameters:	output_gmt – the GMT to print

Example

>>> geneset = rc.ReadGmt("geneset_file.csv").get_geneset("brca")
>>> setnames = [key for key in geneset.keys()]
>>> import pygna.reading_class as rc
>>> network = rc.ReadTsv("network_file.tsv").get_network()
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> for setname, item in geneset.items():
...     item = set(item)
...     module = nx.subgraph(network, item)
...     lcc = sorted(list(nx.connected_components(module)), key=len, reverse=True)[0]
...     out.add_GMT_entry("brca", "topology_module", lcc)
>>> out.create_GMT_output("output_lcc.gmt")

create_comparison_table_empirical() → None[source]¶

Write the hadings for the comparison table

Example

>>> setnames = ["A", "B", "C"]
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> out.create_comparison_table_empirical()

create_st_table_empirical() → None[source]¶

Create the headings of the table that are going to be wrinnte in the csv file

Example

>>> setnames = ["A", "B", "C"]
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> out.create_st_table_empirical()

set_diffusion_matrix(diffusion_matrix_file: str) → None[source]¶

Set the diffusion matrix file

Parameters:	diffusion_matrix_file – set the diffusion matrix file to use

Example

>>> setnames = ["A", "B", "C"]
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> out.set_diffusion_matrix("diffusion_matrix.csv")

update_comparison_table_empirical(setname_A: str, setname_B: str, n_geneset_A: int, n_mapped_A: int, n_geneset_B: int, n_mapped_B: int, n_overlaps: int, number_of_permutations: int, observed: int, empirical_pvalue: float, mean_null: numpy.mean, var_null: numpy.var) → None[source]¶

Update the content of the comparison table, adding a new row on the file

Parameters:

setname_A – the name of the geneset A
setname_B – the name of the geneset B
n_geneset_A – the number of genes in the geneset A
n_mapped_A – the number of mapped genes in geneset A
n_geneset_B – the number of genes in the geneset B
n_mapped_B – the number of mapped genes in geneset B
n_overlaps – the number of overlaps
number_of_permutations – number of performed permutations
observed – number of observed genes
empirical_pvalue – value of the empirical pvalue
mean_null – mean of the null distribution
var_null – variance of the null distribution

Example

>>> import itertools
>>> import pygna.command as cm
>>> import pygna.reading_class as rc
>>> import pygna.statistical_comparison as sc
>>> geneset_a = rc.ReadGmt("genset_file").get_geneset("brca")
>>> setnames = [key for key in geneset_a.keys()]
>>> network = rc.ReadTsv("network_file.tsv").get_network()
>>> distance_matrix_filename = "distance_matrix.tsv"
>>> in_memory = True
>>> network = nx.Graph(network.subgraph(max(nx.connected_components(network), key=len)))
>>> sp_diz = {"nodes": cm.read_distance_matrix(distance_matrix_filename, in_memory=in_memory)[0],
...           "matrix": cm.read_distance_matrix(distance_matrix_filename, in_memory=in_memory)[1]}
>>> st_comparison = sc.StatisticalComparison(sc.comparison_shortest_path, network, diz=sp_diz, n_proc=2)
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> for pair in itertools.combinations(setnames, 2):
...     observed, pvalue, null_d, a_mapped, b_mapped = st_comparison.comparison_empirical_pvalue(set(geneset_a[pair[0]]), set(geneset_a[pair[1]]), max_iter=number_of_permutations)
...     out.update_comparison_table_empirical(pair[0], pair[1], len(set(geneset_a[pair[0]])), a_mapped, len(set(geneset_a[pair[1]])), b_mapped, n_overlaps, number_of_permutations, observed, pvalue, np.mean(null_d), np.var(null_d))

update_st_table_empirical(setname: str, n_mapped: int, n_geneset: int, number_of_permutations: int, observed: int, empirical_pvalue: float, mean_null: numpy.mean, var_null: numpy.var) → None[source]¶

Update the table content, adding a new line to the file

Parameters:	setname – the name of the geneset n_mapped – the number of mapped genes n_geneset – the number of genesets number_of_permutations – the number of permutations observed – value of observed genes empirical_pvalue – value of the empirical p-value mean_null – mean of the null distribution var_null – var of the null distribution

Example

>>> setnames = ["A", "B", "C"]
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> out.create_st_table_empirical()
>>> out.update_st_table_empirical(setname, n_mapped, n_geneset, number_of_permutations, observed, pvalue=0.001, mean_null=np.mean(0.11), var_null=np.var(0.2))

pygna.output.print_GMT(gmt_dictionary: dict, output_file: str) → None[source]¶

Save the dictionary on a GMT file

Parameters:	gmt_dictionary – the dictionary containing the data output_file – the file to save the data

Example

>>> gmt_dict = {"key": "dict_sets"}
>>> print_GMT(gmt_dict, "mygmt.gmt")

pygna.output.apply_multiple_testing_correction(table_file: str, pval_col: str = 'empirical_pvalue', method: str = 'fdr_bh', threshold: float = 0.1) → None[source]¶

Apply the multiple testing correction and save the file in a csv file

Parameters:	table_file – the name of the file to read pval_col – the name column containing the empirical pvalue method – the correction method to use threshold – the threshold to use in the method

Example

>>> table_filename = "pygna_comparison_results.csv"
>>> apply_multiple_testing_correction(table_filename, pval_col="empirical_pvalue", method="fdr_bh", threshold=0.1)

pygna.output.write_graph_summary(graph: networkx.classes.graph.Graph, output_file: str, net_name: str = None) → None[source]¶

This function takes a graph as input and writes the network properties in a text file

Parameters:	graph – the graph to print output_file – the name of the file to print net_name – the name of the network

Example

>>> import pygna.reading_class as rc
>>> text_output = "My summary stats"
>>> network = rc.ReadTsv("mynetwork.tsv").get_network()
>>> write_graph_summary(network, text_output, "mynetwork.tsv")