File size: 7,774 Bytes
138a454 a45fc81 138a454 507df33 138a454 507df33 138a454 507df33 138a454 507df33 138a454 507df33 138a454 a45fc81 138a454 a45fc81 138a454 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# pip-code-to-doc
[pipableAi](https://www.linkedin.com/company/pipable.ai/about/)
[colab_notebook](https://colab.research.google.com/drive/17PyMU_3QN9LROy7x-jmaema0cuLRzBvc?usp=sharing)
## What have we built?
A 1.3 bn code documentation model that outperforms most models on documenting codes and making your in-house libs ready for LLM and RAG pipelines.
We have also open sourced a [parsing lib](https://github.com/PipableAI/pip-library-parser) for the same, together the lib and model can turn your codebase to functional parse tree ready to be consumed by LLMs to execute complex tasks.
This is a further trained version of pip-sql-1.3b.
## How we built it?
We used softmax cross entropy and a modified form of policy grad along with Q loss, optimized in an EM set up.
Loss behaviour in the set up mentioned above -
## License
The model is open source under apache 2.0. License
## Usage
### Library use
```python
!pip3 install git+https://github.com/PipableAI/pip-library-parser
!pip3 install atlassian-python-api
from pip_library_parser import CodeToDocGenerator
from atlassian import Jira
import torch
torch.set_default_device("cuda")
# Instantiate the CodeToDocGenerator
generator = CodeToDocGenerator()
# Generate docstrings for the module's functions and methods
module = Jira
module_name = "atlassian.Jira"
docs = generator.generate_module_docs(module, module_name)
print(docs)
```
```python
from pip_library_parser import CodeToDocGenerator
# Instantiate the CodeToDocGenerator
generator = CodeToDocGenerator()
code_snippet = """
def example_function(x):
return x * 2
"""
docstring = generator.generate_docstring_from_pip_model(code_snippet)
print("Generated Docstring:")
print(docstring)
```
### Installation
```bash
pip install transformers
```
### Prompt
```python
prompt = f"""<function_code>{code}</function_code>
<question>Give one line description of the python code above in natural language.</question>
<doc>"""
```
### PyTorch
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda"
model = AutoModelForCausalLM.from_pretrained("PipableAI/pip-code-to-doc-1.3b").to(device)
tokenizer = AutoTokenizer.from_pretrained("PipableAI/pip-code-to-doc-1.3b")
prompt = f"""
<function_code>
def example_function(x):
return x * 2
</function_code>
<question>Give one line description of the python code above in natural language.</question>
<doc>"""
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=300)
tokenizer.decode(outputs[0], skip_special_tokens=True).split('<doc>')[-1].split('</doc>')[0]
```
## Examples
### prompt
```python
<function_code>
###########################
# Generate Analytical Model
###########################
##################################################
# func: get_np_array_transition_probability_matrix
##################################################
def get_np_array_transition_probability_matrix(int_num_states, np_array_A_matrix):
print('np_array_A_matrix:')
print(np_array_A_matrix)
#####################################################
# Perturb the adjacency matrix to avoid singularities
#####################################################
np_array_A_matrix += (np.full((int_num_states, int_num_states), float_eps) - (np.identity(int_num_states) * float_eps))
print('np_array_A_matrix:')
print(np_array_A_matrix)
print('np_array_D_matrix:')
np_array_D_matrix = np.diag(np.sum(np_array_A_matrix, axis=1))
print(np_array_D_matrix)
print('np_array_D_matrix_inv:')
np_array_D_matrix_inv = np.linalg.inv(np_array_D_matrix)
print(np_array_D_matrix_inv)
print('\n\n')
print('np_array_P_matrix:')
np_array_P_matrix = np.dot(np_array_D_matrix_inv, np_array_A_matrix)
print(np_array_P_matrix)
print('np.sum(np_array_P_matrix, axis=1):')
print(np.sum(np_array_P_matrix, axis=1))
print('\n\n')
return np_array_P_matrix
##################################################
# func: get_np_array_perron_frobenius_eigen_vector
##################################################
def get_np_array_perron_frobenius_matrix(int_num_states, np_array_P_matrix):
np_array_perron_frobenius_matrix = np.linalg.matrix_power(np_array_P_matrix,1000)
np_array_perron_frobenius_vector = np_array_perron_frobenius_matrix[0,:]
print('np_array_perron_frobenius_matrix:')
print(np_array_perron_frobenius_matrix)
print('np.sum(np_array_perron_frobenius_matrix, axis=1):')
print(np.sum(np_array_perron_frobenius_matrix, axis=1))
print('np.sum(np_array_perron_frobenius_matrix, axis=0):')
print(np.sum(np_array_perron_frobenius_matrix, axis=0))
print('np.sum(np_array_perron_frobenius_matrix, axis=0)/int_num_states:')
print(np.sum(np_array_perron_frobenius_matrix, axis=0)/int_num_states)
print('np.dot(np_array_perron_frobenius_vector, np_array_P_matrix):')
print(np.dot(np_array_perron_frobenius_vector, np_array_P_matrix))
print('np_array_perron_frobenius_vector:')
print(np_array_perron_frobenius_vector)
print('\n\n')
return np_array_perron_frobenius_vector, np_array_perron_frobenius_matrix
#############################
# func: get_np_array_Z_matrix
#############################
def get_np_array_Z_matrix(int_num_states, np_array_P_matrix, np_array_perron_frobenius_matrix):
np_array_Z_matrix = np.linalg.inv(np.identity(int_num_states) - np_array_P_matrix + np_array_perron_frobenius_matrix)
print('np_array_Z_matrix:')
print(np_array_Z_matrix)
print('\n\n')
return(np_array_Z_matrix)
#############################
# func: get_np_array_H_matrix
#############################
def get_np_array_H_matrix(int_num_states, np_array_Z_matrix, np_array_perron_frobenius_vector):
np_array_H_matrix = np.zeros([int_num_states, int_num_states])
for i in range(int_num_states):
for j in range(int_num_states):
np_array_H_matrix[i][j] = (np_array_Z_matrix[j][j] - np_array_Z_matrix[i][j])/np_array_perron_frobenius_vector[j]
print('np_array_H_matrix:')
print(np_array_H_matrix)
print('\n\n')
return np_array_H_matrix
###########
# func: run
###########
def run(np_array_A_matrix):
int_num_states = len(np_array_A_matrix)
np_array_P_matrix = get_np_array_transition_probability_matrix(int_num_states, np_array_A_matrix)
np_array_perron_frobenius_vector, np_array_perron_frobenius_matrix = get_np_array_perron_frobenius_matrix(int_num_states, np_array_P_matrix)
np_array_Z_matrix = get_np_array_Z_matrix(int_num_states, np_array_P_matrix, np_array_perron_frobenius_matrix)
np_array_H_matrix = get_np_array_H_matrix(int_num_states, np_array_Z_matrix, np_array_perron_frobenius_vector)
return(np_array_H_matrix)
</function_code>
<question>Give one line description of the python code above in natural language.</question>
<doc>
```
### Response
```txt
The given python code is a function that calculates the transition probability matrix, P, for a given adjacency matrix A, and then uses these matrices to calculate the Perron-Frobenius eigenvector and its inverse matrix Z, and finally, the H matrix which is the inverse of the Z matrix. The H matrix is then returned as the output of the function. The adjacency matrix A is a square matrix where each element at position (i, j) represents the probability of transitioning from state i to state j. The function first perturbs the adjacency matrix to avoid singularities, then calculates the transition probability matrix P, the Perron-Frobenius eigenvector and its inverse matrix Z, and finally, the H matrix. The H matrix is then returned as the output of the function.
```
### Team
Avi Kothari, Gyan Ranjan, Pratham Gupta, Ritvik Aryan Kalra, Soham Acharya |