Sign Up

Have an account? Sign In Now

Sign In

Forgot Password?

Don't have account, Sign Up Here

Forgot Password

Lost your password? Please enter your email address. You will receive a link and will create a new password via email.

Have an account? Sign In Now

You must login to ask question.

Forgot Password?

Need An Account, Sign Up Here

Please briefly explain why you feel this question should be reported.

Please briefly explain why you feel this answer should be reported.

Sign InSign Up

Softans

Softans Logo Softans Logo
Search
Ask A Question

Mobile menu

Close
Ask a Question
  • Home
  • Add group
  • Groups page
  • Communities
  • Questions
    • New Questions
    • Trending Questions
    • Must read Questions
    • Hot Questions
  • Polls
  • Tags
  • Badges
  • Users
  • Help
Home/ Questions/Q 1535
In Process
Ashwin
Ashwin
Asked: March 1, 20222022-03-01T04:49:32+00:00 2022-03-01T04:49:32+00:00

Combine two python codes doesn’t work well

I have two python codes. The first code is designed to split pdf to images then crop each image into four parts and the final output of images is put in Stickers folder
from pathlib import Path
import shutil
import fitz
from PIL import Image
import cv2, os

def crop(image_path, coords, saved_location):
    image_obj = Image.open(image_path)
    cropped_image = image_obj.crop(coords)
    cropped_image.save(saved_location)

def all_white_pixels(img):
    H, W = img.shape[:2]
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    pixels = cv2.countNonZero(thresh)
    return True if pixels == (H * W) else False

BASE_DIR = Path.cwd()
OUTPUT_DIR = BASE_DIR / "Output"
STICKERS_DIR = BASE_DIR / "Stickers"

try:
    shutil.rmtree(OUTPUT_DIR)
except OSError as e:
    pass
try:
    shutil.rmtree(STICKERS_DIR)
except:
    pass
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
STICKERS_DIR.mkdir(parents=True, exist_ok=True)

doc = fitz.open("Sample.pdf")
for page in doc:
    pix = page.get_pixmap()
    img = "Output/Page-%i.png" % page.number
    pix.save(img)
    s1 = 'Stickers/Page-%i-Cropped1.png' % page.number
    s2 = 'Stickers/Page-%i-Cropped2.png' % page.number
    s3 = 'Stickers/Page-%i-Cropped3.png' % page.number
    s4 = 'Stickers/Page-%i-Cropped4.png' % page.number
    crop(img, (34, 6, 588, 192), s1)
    crop(img, (34, 192, 588, 373), s2)
    crop(img, (34, 373, 588, 550), s3)
    crop(img, (34, 550, 588, 730), s4)
    img1 = cv2.imread(s1)
    if all_white_pixels(img1):
        os.remove(s1)
    img2 = cv2.imread(s2)
    if all_white_pixels(img2):
        os.remove(s2)
    img3 = cv2.imread(s3)
    if all_white_pixels(img3):
        os.remove(s3)
    img4 = cv2.imread(s4)
    if all_white_pixels(img4):
        os.remove(s4)

And I have another python code that reads the numbers on the stickers then rename those images according to the number

import os, pytesseract
import cv2

sTemp = "Temp.png"
directory = '.\Stickers'

def useMagick(img):
    pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
    command = 'magick convert {} -resize 2048x640 -density 200 -quality 100 {}'.format(img, sTemp)
    os.system(command)

def readNumber(img):
    img = cv2.imread(img)
    gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    txt = pytesseract.image_to_string(gry)
    try:
        return re.findall(r'\d+\s?\/\s?(\d+)', txt)[0]
    except:
        blur = cv2.GaussianBlur(gry, (3,3), 0)
        txt = pytesseract.image_to_string(blur)
        try:
            return re.findall(r'\d+\s?\/\s?(\d+)', txt)[0]
        except:
            return "REVIEW"

for filename in os.listdir(directory):
    if filename.endswith(".png"):
        sPath = os.path.join(directory, filename)
        useMagick(sPath)
        x = readNumber(sTemp)
        try:
            print(x)
            try:
                os.rename(sPath, os.path.join(os.getcwd(), directory, x + '.png'))
            except:
                n+=1
                os.rename(sPath, os.path.join(os.getcwd(), directory, x + '_' + str(n) + '.png'))
            continue
        except:
            print(sPath)
    else:
        continue

Both codes are working very well but separately. When trying to combine both of them, I didn’t get the desired output and the files are not renamed. Here’s my final code that didn’t work well

from pathlib import Path
import shutil
import fitz
from PIL import Image
import cv2, os, pytesseract
import time

def crop(image_path, coords, saved_location):
    image_obj = Image.open(image_path)
    cropped_image = image_obj.crop(coords)
    cropped_image.save(saved_location)

def all_white_pixels(img):
    H, W = img.shape[:2]
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    pixels = cv2.countNonZero(thresh)
    return True if pixels == (H * W) else False

BASE_DIR = Path.cwd()
OUTPUT_DIR = BASE_DIR / "Output"
STICKERS_DIR = BASE_DIR / "Stickers"

try:
    shutil.rmtree(OUTPUT_DIR)
except OSError as e:
    pass
try:
    shutil.rmtree(STICKERS_DIR)
except:
    pass
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
STICKERS_DIR.mkdir(parents=True, exist_ok=True)

doc = fitz.open("Sample.pdf")
for page in doc:
    pix = page.get_pixmap()
    img = "Output/Page-%i.png" % page.number
    pix.save(img)
    s1 = 'Stickers/Page-%i-Cropped1.png' % page.number
    s2 = 'Stickers/Page-%i-Cropped2.png' % page.number
    s3 = 'Stickers/Page-%i-Cropped3.png' % page.number
    s4 = 'Stickers/Page-%i-Cropped4.png' % page.number
    crop(img, (34, 6, 588, 192), s1)
    crop(img, (34, 192, 588, 373), s2)
    crop(img, (34, 373, 588, 550), s3)
    crop(img, (34, 550, 588, 730), s4)
    img1 = cv2.imread(s1)
    if all_white_pixels(img1):
        os.remove(s1)
    img2 = cv2.imread(s2)
    if all_white_pixels(img2):
        os.remove(s2)
    img3 = cv2.imread(s3)
    if all_white_pixels(img3):
        os.remove(s3)
    img4 = cv2.imread(s4)
    if all_white_pixels(img4):
        os.remove(s4)

time.sleep(5)
sTemp = "Temp.png"
directory = '.\Stickers'

def useMagick(img):
    pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
    command = 'magick convert {} -resize 2048x640 -density 200 -quality 100 {}'.format(img, sTemp)
    os.system(command)

def readNumber(img):
    img = cv2.imread(img)
    gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    txt = pytesseract.image_to_string(gry)
    try:
        return re.findall(r'\d+\s?\/\s?(\d+)', txt)[0]
    except:
        blur = cv2.GaussianBlur(gry, (3,3), 0)
        txt = pytesseract.image_to_string(blur)
        try:
            return re.findall(r'\d+\s?\/\s?(\d+)', txt)[0]
        except:
            return "REVIEW"

for filename in os.listdir(directory):
    if filename.endswith(".png"):
        sPath = os.path.join(directory, filename)
        useMagick(sPath)
        x = readNumber(sTemp)
        try:
            print(x)
            try:
                os.rename(sPath, os.path.join(os.getcwd(), directory, x + '.png'))
            except:
                n+=1
                os.rename(sPath, os.path.join(os.getcwd(), directory, x + '_' + str(n) + '.png'))
            continue
        except:
            print(sPath)
    else:
        continue

The code is working as for the first (the pdf is split into images and the images are split into chops) but it doesn’t work as for renaming the images according to the numbers extracted by ocr Here’s a sample pdf file to test the codes on it https://www.mediafire.com/file/cy6xwjkhc1td2zg/Sample.pdf/file

python


python
  • 0
  • 1 1 Answer
  • 30 Views
  • 0 Followers
  • 0
Answer
Share
  • Facebook
  • Report

1 Answer

  • Voted
  • Oldest
  • Recent
  1. Jack
    2022-03-10T13:02:56+00:00Added an answer on March 10, 2022 at 1:02 pm

    Code:

    from pathlib import Path
    import shutil
    import fitz
    from PIL import Image
    import cv2, os
    
    def crop(image_path, coords, saved_location):
        image_obj = Image.open(image_path)
        cropped_image = image_obj.crop(coords)
        cropped_image.save(saved_location)
    
    def all_white_pixels(img):
        H, W = img.shape[:2]
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
        pixels = cv2.countNonZero(thresh)
        return True if pixels == (H * W) else False
    
    BASE_DIR = Path.cwd()
    OUTPUT_DIR = BASE_DIR / "Output"
    STICKERS_DIR = BASE_DIR / "Stickers"
    
    try:
        shutil.rmtree(OUTPUT_DIR)
    except OSError as e:
        pass
    try:
        shutil.rmtree(STICKERS_DIR)
    except:
        pass
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    STICKERS_DIR.mkdir(parents=True, exist_ok=True)
    
    doc = fitz.open("Sample.pdf")
    for page in doc:
        pix = page.get_pixmap()
        img = "Output/Page-%i.png" % page.number
        pix.save(img)
        s1 = 'Stickers/Page-%i-Cropped1.png' % page.number
        s2 = 'Stickers/Page-%i-Cropped2.png' % page.number
        s3 = 'Stickers/Page-%i-Cropped3.png' % page.number
        s4 = 'Stickers/Page-%i-Cropped4.png' % page.number
        crop(img, (34, 6, 588, 192), s1)
        crop(img, (34, 192, 588, 373), s2)
        crop(img, (34, 373, 588, 550), s3)
        crop(img, (34, 550, 588, 730), s4)
        img1 = cv2.imread(s1)
        if all_white_pixels(img1):
            os.remove(s1)
        img2 = cv2.imread(s2)
        if all_white_pixels(img2):
            os.remove(s2)
        img3 = cv2.imread(s3)
        if all_white_pixels(img3):
            os.remove(s3)
        img4 = cv2.imread(s4)
        if all_white_pixels(img4):
            os.remove(s4)

    And I have another python code that reads the numbers on the stickers then rename those images according to the number

    Code:

    import os, pytesseract
    import cv2
    
    sTemp = "Temp.png"
    directory = '.\Stickers'
    
    def useMagick(img):
        pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
        command = 'magick convert {} -resize 2048x640 -density 200 -quality 100 {}'.format(img, sTemp)
        os.system(command)
    
    def readNumber(img):
        img = cv2.imread(img)
        gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        txt = pytesseract.image_to_string(gry)
        try:
            return re.findall(r'\d+\s?\/\s?(\d+)', txt)[0]
        except:
            blur = cv2.GaussianBlur(gry, (3,3), 0)
            txt = pytesseract.image_to_string(blur)
            try:
                return re.findall(r'\d+\s?\/\s?(\d+)', txt)[0]
    
     return "REVIEW"
    
    for filename in os.listdir(directory):
        if filename.endswith(".png"):
            sPath = os.path.join(directory, filename)
            useMagick(sPath)
            x = readNumber(sTemp)
            try:
                print(x)
                try:
                    os.rename(sPath, os.path.join(os.getcwd(), directory, x + '.png'))
                except:
                    n+=1
                    os.rename(sPath, os.path.join(os.getcwd(), directory, x + '_' + str(n) + '.png'))
                continue
            except:
                print(sPath)
        else:
            continue
    
    Both codes are working very well but separately. When trying to combine both of them, I didn't get the desired output and the files are not renamed. Here's my final code that didn't work well
    
    

    Code:

    from pathlib import Path
    import shutil
    import fitz
    from PIL import Image
    import cv2, os, pytesseract
    import time
    
    def crop(image_path, coords, saved_location):
        image_obj = Image.open(image_path)
        cropped_image = image_obj.crop(coords)
        cropped_image.save(saved_location)
    
    def all_white_pixels(img):
        H, W = img.shape[:2]
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
        pixels = cv2.countNonZero(thresh)
        return True if pixels == (H * W) else False
    
    BASE_DIR = Path.cwd()
    OUTPUT_DIR = BASE_DIR / "Output"
    STICKERS_DIR = BASE_DIR / "Stickers"
    except:
    
    try:
        shutil.rmtree(OUTPUT_DIR)
    except OSError as e:
        pass
    try:
        shutil.rmtree(STICKERS_DIR)
    except:
        pass
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    STICKERS_DIR.mkdir(parents=True, exist_ok=True)
    
    doc = fitz.open("Sample.pdf")
    for page in doc:
        pix = page.get_pixmap()
        img = "Output/Page-%i.png" % page.number
        pix.save(img)
        s1 = 'Stickers/Page-%i-Cropped1.png' % page.number
        s2 = 'Stickers/Page-%i-Cropped2.png' % page.number
        s3 = 'Stickers/Page-%i-Cropped3.png' % page.number
        s4 = 'Stickers/Page-%i-Cropped4.png' % page.number
        crop(img, (34, 6, 588, 192), s1)
        crop(img, (34, 192, 588, 373), s2)
        crop(img, (34, 373, 588, 550), s3)
        crop(img, (34, 550, 588, 730), s4)
        img1 = cv2.imread(s1)
        if all_white_pixels(img1):
            os.remove(s1)
        img2 = cv2.imread(s2)
        if all_white_pixels(img2):
            os.remove(s2)
        img3 = cv2.imread(s3)
        if all_white_pixels(img3):
            os.remove(s3)
        img4 = cv2.imread(s4)
        if all_white_pixels(img4):
            os.remove(s4)
    
    time.sleep(5)
    sTemp = "Temp.png"
    directory = '.\Stickers'
    
    def useMagick(img):
        pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
        command = 'magick convert {} -resize 2048x640 -density 200 -quality 100 {}'.format(img, sTemp)
        os.system(command)
    
    def readNumber(img):
        img = cv2.imread(img)
        gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        txt = pytesseract.image_to_string(gry)
        try:
            return re.findall(r'\d+\s?\/\s?(\d+)', txt)[0]
        except:
            blur = cv2.GaussianBlur(gry, (3,3), 0)
            txt = pytesseract.image_to_string(blur)
            try:
                return re.findall(r'\d+\s?\/\s?(\d+)', txt)[0]
            except:
                return "REVIEW"
    
    for filename in os.listdir(directory):
        if filename.endswith(".png"):
            sPath = os.path.join(directory, filename)
            useMagick(sPath)
            x = readNumber(sTemp)
            try:
                print(x)
                try:
                    os.rename(sPath, os.path.join(os.getcwd(), directory, x + '.png'))
                except:
                    n+=1
                    os.rename(sPath, os.path.join(os.getcwd(), directory, x + '_' + str(n) + '.png'))
                continue
            except:
                print(sPath)
        else:
            continue
    • 0
    • Reply
    • Share
      Share
      • Share on Facebook
      • Share on Twitter
      • Share on LinkedIn
      • Share on WhatsApp
      • Report

Leave an answer
Cancel reply

You must login to add an answer.

Forgot Password?

Need An Account, Sign Up Here

Sidebar

Ask A Question
  • Popular
  • Answers
  • Ghulam Nabi

    Why are the British confused about us calling bread rolls ...

    • 5 Answers
  • Ghulam Nabi

    Is this statement, “i see him last night” can be ...

    • 4 Answers
  • Alex

    application has failed to start because no appropriate graphics hardware ...

    • 4 Answers
  • Ghulam Nabi
    Ghulam Nabi added an answer To resolve the NullPointerException, you need to identify the variable… March 15, 2023 at 8:25 am
  • Ghulam Nabi
    Ghulam Nabi added an answer You can replace the PnP code in your Azure Function… February 13, 2023 at 7:11 am
  • Ghulam Nabi
    Ghulam Nabi added an answer You can use the $match stage in the aggregate pipeline… February 10, 2023 at 6:20 am

Trending Tags

android c++ cypress flutter java javascript python selenium testng webdriver

Top Members

Robert

Robert

  • 3 Questions
  • 1k Points
Luci

Luci

  • 5 Questions
  • 1k Points
Kevin O Brien

Kevin O Brien

  • 2 Questions
  • 1k Points

Explore

  • Home
  • Add group
  • Groups page
  • Communities
  • Questions
    • New Questions
    • Trending Questions
    • Must read Questions
    • Hot Questions
  • Polls
  • Tags
  • Badges
  • Users
  • Help

Footer

Softans

Softans is a social questions & Answers Engine which will help you establish your community and connect with other people.

About Us

  • Blog
  • Jobs
  • About Us
  • Meet The Team
  • Contact Us

Legal Stuff

Help

Follow

© 2021 Softans. All Rights Reserved
With Love by Softans.

Insert/edit link

Enter the destination URL

Or link to existing content

    No search term specified. Showing recent items. Search or use up and down arrow keys to select an item.