Sort the wildcard * in ascending numerical order

1

I'm using * to take all the files that start with out :

We have :  ['out0.jpg', 'out2.jpg', 'out4.jpg', 'out5.jpg', 'out1.jpg', 'out6.jpg', 'out3.jpg']

but I would like them to be sorted in ascending order. At the moment I am doing:

for file_name in file_names:
    print("We have : ",file_names)

The complete code is here:

# import the necessary packages
from PIL import Image
import pytesseract
import argparse
import cv2
import os
## split
from PyPDF2 import PdfFileWriter, PdfFileReader
# remove
import sys
# 
from pdf2image import convert_from_path
# import all files with a name
import glob

# functions
def pdfspliterimager(filename):
    inputpdf = PdfFileReader(open(filename, "rb"))
    for i in range(inputpdf.numPages):
        output = PdfFileWriter()
        output.addPage(inputpdf.getPage(i))
        with open("document-page%s.pdf" % i, "wb") as outputStream:
            output.write(outputStream)
        pages = convert_from_path("document-page%s.pdf" % i, 500)
        for page in pages:
            page.save('out%s.jpg'%i, 'JPEG')

        os.remove("document-page%s.pdf" % i)

# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
    help="path to input image to be OCR'd")
ap.add_argument("-p", "--preprocess", type=str, default="thresh",
    help="type of preprocessing to be done")
args = vars(ap.parse_args())

# we test if it is a pdf
image_path = args["image"]
# if it is a pdf we convert it to an image
if image_path.endswith('.pdf'):
    pdfspliterimager(image_path)

# for all files with out in their name
file_names = glob.glob("out*")
for file_name in file_names:
    print("We have : ",file_names)
    # load the image and convert it to grayscale
    image = cv2.imread(file_name)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # check to see if we should apply thresholding to preprocess the
    # image
    if args["preprocess"] == "thresh":
        gray = cv2.threshold(gray, 0, 255,
            cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

    # make a check to see if median blurring should be done to remove
    # noise
    elif args["preprocess"] == "blur":
        gray = cv2.medianBlur(gray, 3)

    # write the grayscale image to disk as a temporary file so we can
    # apply OCR to it
    filename = "{}.png".format(os.getpid())
    cv2.imwrite(filename, gray)

    # load the image as a PIL/Pillow image, apply OCR, and then delete
    # the temporary file
    text = pytesseract.image_to_string(Image.open(filename))
    os.remove(filename)
    #print(text)

    with open('resume.txt', 'a+') as f:
        print('***:', text, file=f)  

Maybe there are some simpler ways to do it.

    
asked by ThePassenger 04.11.2018 в 13:04
source

1 answer

1

You can use the sorted function and return the ordered list !!

file_names = ['out0.jpg', 'out2.jpg', 'out4.jpg', 'out5.jpg', 'out1.jpg', 'out6.jpg', 'out3.jpg']

file_names_ordenado = sorted(file_names)
print (file_names_ordenado)

The good thing about your list is that it really only changes the value, so using the sorted() function is enough for you.

Greetings!

    
answered by 04.11.2018 / 15:54
source